From ceaec15d49f29de1f8864e31ff4007c34b031bff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 17 Sep 2013 08:07:11 -0400
Subject: [PATCH 001/101] 9p: make v9fs_cache_inode_{get,put,set}_cookie empty
 inlines for !9P_CACHEFS

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/cache.h          | 12 ++++++++++++
 fs/9p/vfs_file.c       |  2 --
 fs/9p/vfs_inode.c      |  6 ------
 fs/9p/vfs_inode_dotl.c |  4 ----
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 40cc54ced5d9..2f9675491095 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
 
 #else /* CONFIG_9P_FSCACHE */
 
+static inline void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+}
+
+static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file)
+{
+}
+
 static inline int v9fs_fscache_release_page(struct page *page,
 					    gfp_t gfp) {
 	return 1;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index aa5ecf479a57..a0df3e73c2b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		v9inode->writeback_fid = (void *) fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-#ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(inode, file);
-#endif
 	return 0;
 out_error:
 	p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 94de6d1482e2..4e65aa903345 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
 	clear_inode(inode);
 	filemap_fdatawrite(inode->i_mapping);
 
-#ifdef CONFIG_9P_FSCACHE
 	v9fs_cache_inode_put_cookie(inode);
-#endif
 	/* clunk the fid stashed in writeback_fid */
 	if (v9inode->writeback_fid) {
 		p9_client_clunk(v9inode->writeback_fid);
@@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
 		goto error;
 
 	v9fs_stat2inode(st, inode, sb);
-#ifdef CONFIG_9P_FSCACHE
 	v9fs_cache_inode_get_cookie(inode);
-#endif
 	unlock_new_inode(inode);
 	return inode;
 error:
@@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto error;
 
 	file->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(dentry->d_inode, file);
-#endif
 
 	*opened |= FILE_CREATED;
 out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a7c481402c46..4c10edec26a0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
 		goto error;
 
 	v9fs_stat2inode_dotl(st, inode);
-#ifdef CONFIG_9P_FSCACHE
 	v9fs_cache_inode_get_cookie(inode);
-#endif
 	retval = v9fs_get_acl(inode, fid);
 	if (retval)
 		goto error;
@@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto err_clunk_old_fid;
 	file->private_data = ofid;
-#ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(inode, file);
-#endif
 	*opened |= FILE_CREATED;
 out:
 	v9fs_put_acl(dacl, pacl);

From 3a93e17cf68b01fa29c7a2e861ce508dcd3401ca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Sep 2013 19:41:16 -0400
Subject: [PATCH 002/101] ecryptfs: check DCACHE_OP_REVALIDATE instead of
 ->d_op

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index bf12ba5dd223..46e31c92e574 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -51,7 +51,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return -ECHILD;
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
+	if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
 		goto out;
 	rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
 	if (dentry->d_inode) {

From 2edbfbf1c1ab0aeb58ce8abfb69b6f6b63542848 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Sep 2013 20:45:11 -0400
Subject: [PATCH 003/101] ecryptfs: don't leave RCU pathwalk immediately

If the underlying dentry doesn't have ->d_revalidate(), there's no need to
force dropping out of RCU mode.  All we need for that is to make freeing
ecryptfs_dentry_info RCU-delayed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c          | 30 ++++++++++++++++--------------
 fs/ecryptfs/ecryptfs_kernel.h |  5 ++++-
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 46e31c92e574..8cefa8e13bcc 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,15 +44,15 @@
  */
 static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	struct dentry *lower_dentry;
-	int rc = 1;
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+	int rc;
+
+	if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
+		return 1;
 
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
-		goto out;
 	rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
 	if (dentry->d_inode) {
 		struct inode *lower_inode =
@@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 	}
-out:
 	return rc;
 }
 
 struct kmem_cache *ecryptfs_dentry_info_cache;
 
+static void ecryptfs_dentry_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(ecryptfs_dentry_info_cache,
+		container_of(head, struct ecryptfs_dentry_info, rcu));
+}
+
 /**
  * ecryptfs_d_release
  * @dentry: The ecryptfs dentry
@@ -74,15 +79,12 @@ struct kmem_cache *ecryptfs_dentry_info_cache;
  */
 static void ecryptfs_d_release(struct dentry *dentry)
 {
-	if (ecryptfs_dentry_to_private(dentry)) {
-		if (ecryptfs_dentry_to_lower(dentry)) {
-			dput(ecryptfs_dentry_to_lower(dentry));
-			mntput(ecryptfs_dentry_to_lower_mnt(dentry));
-		}
-		kmem_cache_free(ecryptfs_dentry_info_cache,
-				ecryptfs_dentry_to_private(dentry));
+	struct ecryptfs_dentry_info *p = dentry->d_fsdata;
+	if (p) {
+		if (p->lower_path.dentry)
+			path_put(&p->lower_path);
+		call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
 	}
-	return;
 }
 
 const struct dentry_operations ecryptfs_dops = {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index df19d34a033b..e7ad2b3f24a7 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -261,7 +261,10 @@ struct ecryptfs_inode_info {
  * vfsmount too. */
 struct ecryptfs_dentry_info {
 	struct path lower_path;
-	struct ecryptfs_crypt_stat *crypt_stat;
+	union {
+		struct ecryptfs_crypt_stat *crypt_stat;
+		struct rcu_head rcu;
+	};
 };
 
 /**

From 92dd123033d50051d1352fada1bad91c22a119cf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Sep 2013 20:50:13 -0400
Subject: [PATCH 004/101] ecryptfs: get rid of ecryptfs_set_dentry_lower{,_mnt}

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/ecryptfs_kernel.h | 14 --------------
 fs/ecryptfs/inode.c           |  4 ++--
 fs/ecryptfs/main.c            |  3 +--
 3 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index e7ad2b3f24a7..90d1882b306f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -515,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry)
 	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry;
 }
 
-static inline void
-ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
-{
-	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry =
-		lower_dentry;
-}
-
 static inline struct vfsmount *
 ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
 {
@@ -534,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry)
 	return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
 }
 
-static inline void
-ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
-{
-	((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt =
-		lower_mnt;
-}
-
 #define ecryptfs_printk(type, fmt, arg...) \
         __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
 __printf(1, 2)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 67e9b6339691..2ca62c40c6c2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
 	BUG_ON(!d_count(lower_dentry));
 
 	ecryptfs_set_dentry_private(dentry, dentry_info);
-	ecryptfs_set_dentry_lower(dentry, lower_dentry);
-	ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+	dentry_info->lower_path.mnt = lower_mnt;
+	dentry_info->lower_path.dentry = lower_dentry;
 
 	if (!lower_dentry->d_inode) {
 		/* We want to add because we couldn't find in lower */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index eb1c5979ecaf..1b119d3bf924 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 
 	/* ->kill_sb() will take care of root_info */
 	ecryptfs_set_dentry_private(s->s_root, root_info);
-	ecryptfs_set_dentry_lower(s->s_root, path.dentry);
-	ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt);
+	root_info->lower_path = path;
 
 	s->s_flags |= MS_ACTIVE;
 	return dget(s->s_root);

From cbe9c08524a68a1bfc1ba8d84dabb985d18ff08d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Sep 2013 20:54:18 -0400
Subject: [PATCH 005/101] ecryptfs: ->lower_path.dentry is never NULL

... on anything found via ->d_fsdata

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 8cefa8e13bcc..4000f6b3a750 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -81,8 +81,7 @@ static void ecryptfs_d_release(struct dentry *dentry)
 {
 	struct ecryptfs_dentry_info *p = dentry->d_fsdata;
 	if (p) {
-		if (p->lower_path.dentry)
-			path_put(&p->lower_path);
+		path_put(&p->lower_path);
 		call_rcu(&p->rcu, ecryptfs_dentry_free_rcu);
 	}
 }

From 12f38872224542c4683ab72a88394fc89b87ca32 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Sep 2013 21:20:49 -0400
Subject: [PATCH 006/101] libfs: get exports to definitions of objects being
 exported...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c | 69 +++++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 3a3a9b53bf5a..828622a31d30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -31,6 +31,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
 	return 0;
 }
+EXPORT_SYMBOL(simple_getattr);
 
 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
@@ -39,6 +40,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_namelen = NAME_MAX;
 	return 0;
 }
+EXPORT_SYMBOL(simple_statfs);
 
 /*
  * Retaining negative dentries for an in-memory filesystem just wastes
@@ -66,6 +68,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
 	d_add(dentry, NULL);
 	return NULL;
 }
+EXPORT_SYMBOL(simple_lookup);
 
 int dcache_dir_open(struct inode *inode, struct file *file)
 {
@@ -75,12 +78,14 @@ int dcache_dir_open(struct inode *inode, struct file *file)
 
 	return file->private_data ? 0 : -ENOMEM;
 }
+EXPORT_SYMBOL(dcache_dir_open);
 
 int dcache_dir_close(struct inode *inode, struct file *file)
 {
 	dput(file->private_data);
 	return 0;
 }
+EXPORT_SYMBOL(dcache_dir_close);
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
@@ -123,6 +128,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 	return offset;
 }
+EXPORT_SYMBOL(dcache_dir_lseek);
 
 /* Relationship between i_mode and the DT_xxx types */
 static inline unsigned char dt_type(struct inode *inode)
@@ -172,11 +178,13 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 	spin_unlock(&dentry->d_lock);
 	return 0;
 }
+EXPORT_SYMBOL(dcache_readdir);
 
 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
 {
 	return -EISDIR;
 }
+EXPORT_SYMBOL(generic_read_dir);
 
 const struct file_operations simple_dir_operations = {
 	.open		= dcache_dir_open,
@@ -186,10 +194,12 @@ const struct file_operations simple_dir_operations = {
 	.iterate	= dcache_readdir,
 	.fsync		= noop_fsync,
 };
+EXPORT_SYMBOL(simple_dir_operations);
 
 const struct inode_operations simple_dir_inode_operations = {
 	.lookup		= simple_lookup,
 };
+EXPORT_SYMBOL(simple_dir_inode_operations);
 
 static const struct super_operations simple_super_operations = {
 	.statfs		= simple_statfs,
@@ -244,6 +254,7 @@ Enomem:
 	deactivate_locked_super(s);
 	return ERR_PTR(-ENOMEM);
 }
+EXPORT_SYMBOL(mount_pseudo);
 
 int simple_open(struct inode *inode, struct file *file)
 {
@@ -251,6 +262,7 @@ int simple_open(struct inode *inode, struct file *file)
 		file->private_data = inode->i_private;
 	return 0;
 }
+EXPORT_SYMBOL(simple_open);
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
@@ -263,6 +275,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
 	d_instantiate(dentry, inode);
 	return 0;
 }
+EXPORT_SYMBOL(simple_link);
 
 int simple_empty(struct dentry *dentry)
 {
@@ -283,6 +296,7 @@ out:
 	spin_unlock(&dentry->d_lock);
 	return ret;
 }
+EXPORT_SYMBOL(simple_empty);
 
 int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -293,6 +307,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
 	dput(dentry);
 	return 0;
 }
+EXPORT_SYMBOL(simple_unlink);
 
 int simple_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -304,6 +319,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 	drop_nlink(dir);
 	return 0;
 }
+EXPORT_SYMBOL(simple_rmdir);
 
 int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
 		struct inode *new_dir, struct dentry *new_dentry)
@@ -330,6 +346,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	return 0;
 }
+EXPORT_SYMBOL(simple_rename);
 
 /**
  * simple_setattr - setattr for simple filesystem
@@ -370,6 +387,7 @@ int simple_readpage(struct file *file, struct page *page)
 	unlock_page(page);
 	return 0;
 }
+EXPORT_SYMBOL(simple_readpage);
 
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
@@ -393,6 +411,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 	}
 	return 0;
 }
+EXPORT_SYMBOL(simple_write_begin);
 
 /**
  * simple_write_end - .write_end helper for non-block-device FSes
@@ -444,6 +463,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
 
 	return copied;
 }
+EXPORT_SYMBOL(simple_write_end);
 
 /*
  * the inodes created here are not hashed. If you use iunique to generate
@@ -512,6 +532,7 @@ out:
 	dput(root);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(simple_fill_super);
 
 static DEFINE_SPINLOCK(pin_fs_lock);
 
@@ -534,6 +555,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
 	mntput(mnt);
 	return 0;
 }
+EXPORT_SYMBOL(simple_pin_fs);
 
 void simple_release_fs(struct vfsmount **mount, int *count)
 {
@@ -545,6 +567,7 @@ void simple_release_fs(struct vfsmount **mount, int *count)
 	spin_unlock(&pin_fs_lock);
 	mntput(mnt);
 }
+EXPORT_SYMBOL(simple_release_fs);
 
 /**
  * simple_read_from_buffer - copy data from the buffer to user space
@@ -579,6 +602,7 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
 	*ppos = pos + count;
 	return count;
 }
+EXPORT_SYMBOL(simple_read_from_buffer);
 
 /**
  * simple_write_to_buffer - copy data from user space to the buffer
@@ -613,6 +637,7 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
 	*ppos = pos + count;
 	return count;
 }
+EXPORT_SYMBOL(simple_write_to_buffer);
 
 /**
  * memory_read_from_buffer - copy data from the buffer
@@ -644,6 +669,7 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
 
 	return count;
 }
+EXPORT_SYMBOL(memory_read_from_buffer);
 
 /*
  * Transaction based IO.
@@ -665,6 +691,7 @@ void simple_transaction_set(struct file *file, size_t n)
 	smp_mb();
 	ar->size = n;
 }
+EXPORT_SYMBOL(simple_transaction_set);
 
 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
 {
@@ -696,6 +723,7 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s
 
 	return ar->data;
 }
+EXPORT_SYMBOL(simple_transaction_get);
 
 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
 {
@@ -705,12 +733,14 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size
 		return 0;
 	return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
 }
+EXPORT_SYMBOL(simple_transaction_read);
 
 int simple_transaction_release(struct inode *inode, struct file *file)
 {
 	free_page((unsigned long)file->private_data);
 	return 0;
 }
+EXPORT_SYMBOL(simple_transaction_release);
 
 /* Simple attribute files */
 
@@ -746,12 +776,14 @@ int simple_attr_open(struct inode *inode, struct file *file,
 
 	return nonseekable_open(inode, file);
 }
+EXPORT_SYMBOL_GPL(simple_attr_open);
 
 int simple_attr_release(struct inode *inode, struct file *file)
 {
 	kfree(file->private_data);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(simple_attr_release);	/* GPL-only?  This?  Really? */
 
 /* read from the buffer that is filled with the get function */
 ssize_t simple_attr_read(struct file *file, char __user *buf,
@@ -787,6 +819,7 @@ out:
 	mutex_unlock(&attr->mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(simple_attr_read);
 
 /* interpret the buffer as a number to call the set function with */
 ssize_t simple_attr_write(struct file *file, const char __user *buf,
@@ -819,6 +852,7 @@ out:
 	mutex_unlock(&attr->mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(simple_attr_write);
 
 /**
  * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
@@ -957,39 +991,4 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	return 0;
 }
-
-EXPORT_SYMBOL(dcache_dir_close);
-EXPORT_SYMBOL(dcache_dir_lseek);
-EXPORT_SYMBOL(dcache_dir_open);
-EXPORT_SYMBOL(dcache_readdir);
-EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(mount_pseudo);
-EXPORT_SYMBOL(simple_write_begin);
-EXPORT_SYMBOL(simple_write_end);
-EXPORT_SYMBOL(simple_dir_inode_operations);
-EXPORT_SYMBOL(simple_dir_operations);
-EXPORT_SYMBOL(simple_empty);
-EXPORT_SYMBOL(simple_fill_super);
-EXPORT_SYMBOL(simple_getattr);
-EXPORT_SYMBOL(simple_open);
-EXPORT_SYMBOL(simple_link);
-EXPORT_SYMBOL(simple_lookup);
-EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_SYMBOL(simple_readpage);
-EXPORT_SYMBOL(simple_release_fs);
-EXPORT_SYMBOL(simple_rename);
-EXPORT_SYMBOL(simple_rmdir);
-EXPORT_SYMBOL(simple_statfs);
 EXPORT_SYMBOL(noop_fsync);
-EXPORT_SYMBOL(simple_unlink);
-EXPORT_SYMBOL(simple_read_from_buffer);
-EXPORT_SYMBOL(simple_write_to_buffer);
-EXPORT_SYMBOL(memory_read_from_buffer);
-EXPORT_SYMBOL(simple_transaction_set);
-EXPORT_SYMBOL(simple_transaction_get);
-EXPORT_SYMBOL(simple_transaction_read);
-EXPORT_SYMBOL(simple_transaction_release);
-EXPORT_SYMBOL_GPL(simple_attr_open);
-EXPORT_SYMBOL_GPL(simple_attr_release);
-EXPORT_SYMBOL_GPL(simple_attr_read);
-EXPORT_SYMBOL_GPL(simple_attr_write);

From 87dc800be2499128efb3a6f059d75dc8e1e6d503 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:30:04 -0400
Subject: [PATCH 007/101] new helper: kfree_put_link()

duplicated to hell and back...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/cifsfs.c    |  2 +-
 fs/cifs/cifsfs.h    |  2 --
 fs/cifs/link.c      |  7 -------
 fs/ecryptfs/inode.c | 12 +-----------
 fs/gfs2/inode.c     |  9 +--------
 fs/libfs.c          | 10 ++++++++++
 fs/proc/self.c      | 10 +---------
 include/linux/fs.h  |  1 +
 8 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a16b4e58bcc6..801975c34cf9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -860,7 +860,7 @@ const struct inode_operations cifs_file_inode_ops = {
 const struct inode_operations cifs_symlink_inode_ops = {
 	.readlink = generic_readlink,
 	.follow_link = cifs_follow_link,
-	.put_link = cifs_put_link,
+	.put_link = kfree_put_link,
 	.permission = cifs_permission,
 	/* BB add the following two eventually */
 	/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6d0b07217ac9..26a754f49ba1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
 
 /* Functions related to symlinks */
 extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
-extern void cifs_put_link(struct dentry *direntry,
-			  struct nameidata *nd, void *);
 extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
 			 int buflen);
 extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7e36ceba0c7a..cc0234710ddb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -621,10 +621,3 @@ symlink_exit:
 	free_xid(xid);
 	return rc;
 }
-
-void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
-{
-	char *p = nd_get_link(nd);
-	if (!IS_ERR(p))
-		kfree(p);
-}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2ca62c40c6c2..0f9b66eaa767 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -703,16 +703,6 @@ out:
 	return NULL;
 }
 
-static void
-ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
-	char *buf = nd_get_link(nd);
-	if (!IS_ERR(buf)) {
-		/* Free the char* */
-		kfree(buf);
-	}
-}
-
 /**
  * upper_size_to_lower_size
  * @crypt_stat: Crypt_stat associated with file
@@ -1121,7 +1111,7 @@ out:
 const struct inode_operations ecryptfs_symlink_iops = {
 	.readlink = generic_readlink,
 	.follow_link = ecryptfs_follow_link,
-	.put_link = ecryptfs_put_link,
+	.put_link = kfree_put_link,
 	.permission = ecryptfs_permission,
 	.setattr = ecryptfs_setattr,
 	.getattr = ecryptfs_getattr_link,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ced3257f06e8..d2384f7c53e1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1506,13 +1506,6 @@ out:
 	return NULL;
 }
 
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
 /**
  * gfs2_permission -
  * @inode: The inode
@@ -1864,7 +1857,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
 	.readlink = generic_readlink,
 	.follow_link = gfs2_follow_link,
-	.put_link = gfs2_put_link,
+	.put_link = kfree_put_link,
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
diff --git a/fs/libfs.c b/fs/libfs.c
index 828622a31d30..8c5018493154 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -10,6 +10,7 @@
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
 #include <linux/mutex.h>
+#include <linux/namei.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
@@ -992,3 +993,12 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return 0;
 }
 EXPORT_SYMBOL(noop_fsync);
+
+void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
+				void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
+}
+EXPORT_SYMBOL(kfree_put_link);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6b6a993b5c25..ffeb202ec942 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return NULL;
 }
 
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-				void *cookie)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
 static const struct inode_operations proc_self_inode_operations = {
 	.readlink	= proc_self_readlink,
 	.follow_link	= proc_self_follow_link,
-	.put_link	= proc_self_put_link,
+	.put_link	= kfree_put_link,
 };
 
 static unsigned self_inum;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f40547ba191..d80c2437d624 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2502,6 +2502,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
 		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
+extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 extern int vfs_getattr(struct path *, struct kstat *);

From 48bc06e74be178968b53d339dbcb110cd2bb16ea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:35:31 -0400
Subject: [PATCH 008/101] befs: split symlink iops in two - for short and long
 symlinks resp.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/befs/linuxvfs.c | 61 +++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e9c75e20db32..daa15d6ba450 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode);
 static int befs_init_inodecache(void);
 static void befs_destroy_inodecache(void);
 static void *befs_follow_link(struct dentry *, struct nameidata *);
-static void befs_put_link(struct dentry *, struct nameidata *, void *);
+static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
 static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
 			char **out, int *out_len);
 static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = {
 	.bmap		= befs_bmap,
 };
 
+static const struct inode_operations befs_fast_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= befs_fast_follow_link,
+};
+
 static const struct inode_operations befs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= befs_follow_link,
-	.put_link	= befs_put_link,
+	.put_link	= kfree_put_link,
 };
 
 /* 
@@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 		inode->i_op = &befs_dir_inode_operations;
 		inode->i_fop = &befs_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		inode->i_op = &befs_symlink_inode_operations;
+		if (befs_ino->i_flags & BEFS_LONG_SYMLINK)
+			inode->i_op = &befs_symlink_inode_operations;
+		else
+			inode->i_op = &befs_fast_symlink_inode_operations;
 	} else {
 		befs_error(sb, "Inode %lu is not a regular file, "
 			   "directory or symlink. THAT IS WRONG! BeFS has no "
@@ -477,47 +485,40 @@ befs_destroy_inodecache(void)
 static void *
 befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
+	struct super_block *sb = dentry->d_sb;
 	befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+	befs_data_stream *data = &befs_ino->i_data.ds;
+	befs_off_t len = data->size;
 	char *link;
 
-	if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
-		struct super_block *sb = dentry->d_sb;
-		befs_data_stream *data = &befs_ino->i_data.ds;
-		befs_off_t len = data->size;
+	if (len == 0) {
+		befs_error(sb, "Long symlink with illegal length");
+		link = ERR_PTR(-EIO);
+	} else {
+		befs_debug(sb, "Follow long symlink");
 
-		if (len == 0) {
-			befs_error(sb, "Long symlink with illegal length");
+		link = kmalloc(len, GFP_NOFS);
+		if (!link) {
+			link = ERR_PTR(-ENOMEM);
+		} else if (befs_read_lsymlink(sb, data, link, len) != len) {
+			kfree(link);
+			befs_error(sb, "Failed to read entire long symlink");
 			link = ERR_PTR(-EIO);
 		} else {
-			befs_debug(sb, "Follow long symlink");
-
-			link = kmalloc(len, GFP_NOFS);
-			if (!link) {
-				link = ERR_PTR(-ENOMEM);
-			} else if (befs_read_lsymlink(sb, data, link, len) != len) {
-				kfree(link);
-				befs_error(sb, "Failed to read entire long symlink");
-				link = ERR_PTR(-EIO);
-			} else {
-				link[len - 1] = '\0';
-			}
+			link[len - 1] = '\0';
 		}
-	} else {
-		link = befs_ino->i_data.symlink;
 	}
-
 	nd_set_link(nd, link);
 	return NULL;
 }
 
-static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+
+static void *
+befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
-	if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
-		char *link = nd_get_link(nd);
-		if (!IS_ERR(link))
-			kfree(link);
-	}
+	nd_set_link(nd, befs_ino->i_data.symlink);
+	return NULL;
 }
 
 /*

From 6de1472f1a4a3bd912f515f29d3cf52a65a4c718 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:53:17 -0400
Subject: [PATCH 009/101] nfs: use %p[dD] instead of open-coded (and often
 racy) equivalents

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c           | 119 +++++++++++++++++------------------------
 fs/nfs/direct.c        |  17 +++---
 fs/nfs/file.c          | 117 +++++++++++++++-------------------------
 fs/nfs/namespace.c     |   5 +-
 fs/nfs/nfs3proc.c      |   8 +--
 fs/nfs/nfs4file.c      |   4 +-
 fs/nfs/nfs4namespace.c |   7 ++-
 fs/nfs/nfs4proc.c      |   5 +-
 fs/nfs/proc.c          |   8 +--
 fs/nfs/unlink.c        |   9 ++--
 fs/nfs/write.c         |   6 +--
 11 files changed, 119 insertions(+), 186 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 02b0df769e2d..8b3dd7174fa2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -98,9 +98,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
 	struct nfs_open_dir_context *ctx;
 	struct rpc_cred *cred;
 
-	dfprintk(FILE, "NFS: open dir(%s/%s)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name);
+	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 
@@ -297,11 +295,10 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 				if (ctx->duped > 0
 				    && ctx->dup_cookie == *desc->dir_cookie) {
 					if (printk_ratelimit()) {
-						pr_notice("NFS: directory %s/%s contains a readdir loop."
+						pr_notice("NFS: directory %pD2 contains a readdir loop."
 								"Please contact your server vendor.  "
 								"The file: %s has duplicate cookie %llu\n",
-								desc->file->f_dentry->d_parent->d_name.name,
-								desc->file->f_dentry->d_name.name,
+								desc->file,
 								array->array[i].string.name,
 								*desc->dir_cookie);
 					}
@@ -822,9 +819,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	struct nfs_open_dir_context *dir_ctx = file->private_data;
 	int res = 0;
 
-	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name,
-			(long long)ctx->pos);
+	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
+			file, (long long)ctx->pos);
 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
 
 	/*
@@ -880,22 +876,17 @@ out:
 	nfs_unblock_sillyrename(dentry);
 	if (res > 0)
 		res = 0;
-	dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name,
-			res);
+	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
 	return res;
 }
 
 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct nfs_open_dir_context *dir_ctx = filp->private_data;
 
-	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name,
-			offset, whence);
+	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
+			filp, offset, whence);
 
 	mutex_lock(&inode->i_mutex);
 	switch (whence) {
@@ -925,15 +916,12 @@ out:
 static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 			 int datasync)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
-	dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name,
-			datasync);
+	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
 
 	mutex_lock(&inode->i_mutex);
-	nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	mutex_unlock(&inode->i_mutex);
 	return 0;
 }
@@ -1073,9 +1061,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
-				__func__, dentry->d_parent->d_name.name,
-				dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+				__func__, dentry);
 		goto out_bad;
 	}
 
@@ -1125,9 +1112,8 @@ out_set_verifier:
 	nfs_advise_use_readdirplus(dir);
  out_valid_noent:
 	dput(parent);
-	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
-			__func__, dentry->d_parent->d_name.name,
-			dentry->d_name.name);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+			__func__, dentry);
 	return 1;
 out_zap_parent:
 	nfs_zap_caches(dir);
@@ -1147,18 +1133,16 @@ out_zap_parent:
 		goto out_valid;
 
 	dput(parent);
-	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
-			__func__, dentry->d_parent->d_name.name,
-			dentry->d_name.name);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+			__func__, dentry);
 	return 0;
 out_error:
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fhandle);
 	nfs4_label_free(label);
 	dput(parent);
-	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
-			__func__, dentry->d_parent->d_name.name,
-			dentry->d_name.name, error);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+			__func__, dentry, error);
 	return error;
 }
 
@@ -1182,16 +1166,14 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
 	 * eventually need to do something more here.
 	 */
 	if (!inode) {
-		dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
-				__func__, dentry->d_parent->d_name.name,
-				dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
+				__func__, dentry);
 		return 1;
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
-				__func__, dentry->d_parent->d_name.name,
-				dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+				__func__, dentry);
 		return 0;
 	}
 
@@ -1206,9 +1188,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
  */
 static int nfs_dentry_delete(const struct dentry *dentry)
 {
-	dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		dentry->d_flags);
+	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
+		dentry, dentry->d_flags);
 
 	/* Unhash any dentry with a stale inode */
 	if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
@@ -1286,8 +1267,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	struct nfs4_label *label = NULL;
 	int error;
 
-	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
 	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
@@ -1418,8 +1398,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	/* Expect a negative dentry */
 	BUG_ON(dentry->d_inode);
 
-	dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
+			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	err = nfs_check_flags(open_flags);
 	if (err)
@@ -1608,8 +1588,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
 	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
 	int error;
 
-	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
+			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
@@ -1635,8 +1615,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
+			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1664,8 +1644,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct iattr attr;
 	int error;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
+			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
@@ -1692,8 +1672,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
-			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
+			dir->i_sb->s_id, dir->i_ino, dentry);
 
 	trace_nfs_rmdir_enter(dir, dentry);
 	if (dentry->d_inode) {
@@ -1728,8 +1708,7 @@ static int nfs_safe_remove(struct dentry *dentry)
 	struct inode *inode = dentry->d_inode;
 	int error = -EBUSY;
 		
-	dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
 
 	/* If the dentry was sillyrenamed, we simply call d_delete() */
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1762,8 +1741,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	int error;
 	int need_rehash = 0;
 
-	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
+		dir->i_ino, dentry);
 
 	trace_nfs_unlink_enter(dir, dentry);
 	spin_lock(&dentry->d_lock);
@@ -1813,8 +1792,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	unsigned int pathlen = strlen(symname);
 	int error;
 
-	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name, symname);
+	dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
+		dir->i_ino, dentry, symname);
 
 	if (pathlen > PAGE_SIZE)
 		return -ENAMETOOLONG;
@@ -1836,9 +1815,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
 	trace_nfs_symlink_exit(dir, dentry, error);
 	if (error != 0) {
-		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+		dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
 			dir->i_sb->s_id, dir->i_ino,
-			dentry->d_name.name, symname, error);
+			dentry, symname, error);
 		d_drop(dentry);
 		__free_page(page);
 		return error;
@@ -1865,9 +1844,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 	struct inode *inode = old_dentry->d_inode;
 	int error;
 
-	dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
-		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
+		old_dentry, dentry);
 
 	trace_nfs_link_enter(inode, dir, dentry);
 	NFS_PROTO(inode)->return_delegation(inode);
@@ -1915,9 +1893,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct dentry *dentry = NULL, *rehash = NULL;
 	int error = -EBUSY;
 
-	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
-		 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
+	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
+		 old_dentry, new_dentry,
 		 d_count(new_dentry));
 
 	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 91ff089d3412..d71d66c9e0a1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,9 +124,8 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
 {
 #ifndef CONFIG_NFS_SWAP
-	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
-			iocb->ki_filp->f_path.dentry->d_name.name,
-			(long long) pos, nr_segs);
+	dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
+			iocb->ki_filp, (long long) pos, nr_segs);
 
 	return -EINVAL;
 #else
@@ -909,10 +908,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	count = iov_length(iov, nr_segs);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
-	dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
-		file->f_path.dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name,
-		count, (long long) pos);
+	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
+		file, count, (long long) pos);
 
 	retval = 0;
 	if (!count)
@@ -965,10 +962,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	count = iov_length(iov, nr_segs);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
 
-	dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
-		file->f_path.dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name,
-		count, (long long) pos);
+	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
+		file, count, (long long) pos);
 
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1e6bfdbc1aff..e2fcacf07de3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -65,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 {
 	int res;
 
-	dprintk("NFS: open file(%s/%s)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name);
+	dprintk("NFS: open file(%pD2)\n", filp);
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	res = nfs_check_flags(filp->f_flags);
@@ -81,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 int
 nfs_file_release(struct inode *inode, struct file *filp)
 {
-	dprintk("NFS: release(%s/%s)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name);
+	dprintk("NFS: release(%pD2)\n", filp);
 
 	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return nfs_release(inode, filp);
@@ -123,10 +119,8 @@ force_reval:
 
 loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
 {
-	dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name,
-			offset, whence);
+	dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
+			filp, offset, whence);
 
 	/*
 	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
@@ -150,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek);
 int
 nfs_file_flush(struct file *file, fl_owner_t id)
 {
-	struct dentry	*dentry = file->f_path.dentry;
-	struct inode	*inode = dentry->d_inode;
+	struct inode	*inode = file_inode(file);
 
-	dprintk("NFS: flush(%s/%s)\n",
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name);
+	dprintk("NFS: flush(%pD2)\n", file);
 
 	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	if ((file->f_mode & FMODE_WRITE) == 0)
@@ -177,15 +168,14 @@ ssize_t
 nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
-	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
-	struct inode * inode = dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
 
-	dprintk("NFS: read(%s/%s, %lu@%lu)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
+	dprintk("NFS: read(%pD2, %lu@%lu)\n",
+		iocb->ki_filp,
 		(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
@@ -203,13 +193,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
 		     struct pipe_inode_info *pipe, size_t count,
 		     unsigned int flags)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	ssize_t res;
 
-	dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		(unsigned long) count, (unsigned long long) *ppos);
+	dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
+		filp, (unsigned long) count, (unsigned long long) *ppos);
 
 	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (!res) {
@@ -224,12 +212,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read);
 int
 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int	status;
 
-	dprintk("NFS: mmap(%s/%s)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	dprintk("NFS: mmap(%pD2)\n", file);
 
 	/* Note: generic_file_mmap() returns ENOSYS on nommu systems
 	 *       so we call that before revalidating the mapping
@@ -258,15 +244,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
 int
 nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct dentry *dentry = file->f_path.dentry;
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int have_error, do_resend, status;
 	int ret = 0;
 
-	dprintk("NFS: fsync file(%s/%s) datasync %d\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name,
-			datasync);
+	dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
 
 	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
@@ -371,10 +354,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 	struct page *page;
 	int once_thru = 0;
 
-	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
-		file->f_path.dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name,
-		mapping->host->i_ino, len, (long long) pos);
+	dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
+		file, mapping->host->i_ino, len, (long long) pos);
 
 start:
 	/*
@@ -414,10 +395,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	int status;
 
-	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
-		file->f_path.dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name,
-		mapping->host->i_ino, len, (long long) pos);
+	dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
+		file, mapping->host->i_ino, len, (long long) pos);
 
 	/*
 	 * Zero any uninitialised parts of the page, and then mark the page
@@ -601,22 +580,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
 	struct file *filp = vma->vm_file;
-	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = file_inode(filp);
 	unsigned pagelen;
 	int ret = VM_FAULT_NOPAGE;
 	struct address_space *mapping;
 
-	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		filp->f_mapping->host->i_ino,
+	dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
+		filp, filp->f_mapping->host->i_ino,
 		(long long)page_offset(page));
 
 	/* make sure the cache has finished storing the page */
-	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+	nfs_fscache_wait_on_page_write(NFS_I(inode), page);
 
 	lock_page(page);
 	mapping = page_file_mapping(page);
-	if (mapping != dentry->d_inode->i_mapping)
+	if (mapping != inode->i_mapping)
 		goto out_unlock;
 
 	wait_on_page_writeback(page);
@@ -659,22 +637,21 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
 ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 		       unsigned long nr_segs, loff_t pos)
 {
-	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
-	struct inode * inode = dentry->d_inode;
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
 	unsigned long written = 0;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
-	result = nfs_key_timeout_notify(iocb->ki_filp, inode);
+	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
 
-	if (iocb->ki_filp->f_flags & O_DIRECT)
+	if (file->f_flags & O_DIRECT)
 		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
 
-	dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		(unsigned long) count, (long long) pos);
+	dprintk("NFS: write(%pD2, %lu@%Ld)\n",
+		file, (unsigned long) count, (long long) pos);
 
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
@@ -682,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	/*
 	 * O_APPEND implies that we must revalidate the file length.
 	 */
-	if (iocb->ki_filp->f_flags & O_APPEND) {
-		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+	if (file->f_flags & O_APPEND) {
+		result = nfs_revalidate_file_size(inode, file);
 		if (result)
 			goto out;
 	}
@@ -697,8 +674,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 		written = result;
 
 	/* Return error values for O_DSYNC and IS_SYNC() */
-	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
-		int err = vfs_fsync(iocb->ki_filp, 0);
+	if (result >= 0 && nfs_need_sync_write(file, inode)) {
+		int err = vfs_fsync(file, 0);
 		if (err < 0)
 			result = err;
 	}
@@ -717,14 +694,12 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
 			      struct file *filp, loff_t *ppos,
 			      size_t count, unsigned int flags)
 {
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	unsigned long written = 0;
 	ssize_t ret;
 
-	dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		(unsigned long) count, (unsigned long long) *ppos);
+	dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
+		filp, (unsigned long) count, (unsigned long long) *ppos);
 
 	/*
 	 * The combination of splice and an O_APPEND destination is disallowed.
@@ -883,10 +858,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	int ret = -ENOLCK;
 	int is_local = 0;
 
-	dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name,
-			fl->fl_type, fl->fl_flags,
+	dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
+			filp, fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
 
 	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
@@ -923,10 +896,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 	struct inode *inode = filp->f_mapping->host;
 	int is_local = 0;
 
-	dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
-			filp->f_path.dentry->d_parent->d_name.name,
-			filp->f_path.dentry->d_name.name,
-			fl->fl_type, fl->fl_flags);
+	dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
+			filp, fl->fl_type, fl->fl_flags);
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
@@ -960,9 +931,7 @@ EXPORT_SYMBOL_GPL(nfs_flock);
  */
 int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 {
-	dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
-			file->f_path.dentry->d_parent->d_name.name,
-			file->f_path.dentry->d_name.name, arg);
+	dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
 	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(nfs_setlease);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 348b535cd786..b5a0afc3ee10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -253,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
 
 	dprintk("--> nfs_do_submount()\n");
 
-	dprintk("%s: submounting on %s/%s\n", __func__,
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name);
+	dprintk("%s: submounting on %pd2\n", __func__,
+			dentry);
 	if (page == NULL)
 		goto out;
 	devname = nfs_devname(dentry, page, PAGE_SIZE);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 90cb10d7b693..01b6f6a49d16 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 
-	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	dprintk("NFS call  create %pd\n", dentry);
 
 	data = nfs3_alloc_createdata();
 	if (data == NULL)
@@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+	dprintk("NFS call  symlink %pd\n", dentry);
 
 	data = nfs3_alloc_createdata();
 	if (data == NULL)
@@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 
-	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	dprintk("NFS call  mkdir %pd\n", dentry);
 
 	sattr->ia_mode &= ~current_umask();
 
@@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 
-	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
+	dprintk("NFS call  mknod %pd %u:%u\n", dentry,
 			MAJOR(rdev), MINOR(rdev));
 
 	sattr->ia_mode &= ~current_umask();
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 77efaf15ec90..9c8f09a2156f 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -31,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
 	 * -EOPENSTALE.  The VFS will retry the lookup/create/open.
 	 */
 
-	dprintk("NFS: open file(%s/%s)\n",
-		dentry->d_parent->d_name.name,
-		dentry->d_name.name);
+	dprintk("NFS: open file(%pd2)\n", dentry);
 
 	if ((openflags & O_ACCMODE) == 3)
 		openflags--;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2288cd3c9278..049b9fb0d2c9 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -283,8 +283,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
 
-	dprintk("%s: referral at %s/%s\n", __func__,
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: referral at %pd2\n", __func__, dentry);
 
 	page = (char *) __get_free_page(GFP_USER);
 	if (!page)
@@ -348,8 +347,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
 	mnt = ERR_PTR(-ENOENT);
 
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n",
-		__func__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %pd2\n",
+		__func__, dentry);
 
 	err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
 	dput(parent);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d53d6785cba2..75e46bbf7f41 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3738,9 +3738,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	int			status;
 
-	dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__,
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name,
+	dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
+			dentry,
 			(unsigned long long)cookie);
 	nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
 	res.pgbase = args.pgbase;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index a8f57c728df5..fddbba2d9eff 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	};
 	int status = -ENOMEM;
 
-	dprintk("NFS call  create %s\n", dentry->d_name.name);
+	dprintk("NFS call  create %pd\n", dentry);
 	data = nfs_alloc_createdata(dir, dentry, sattr);
 	if (data == NULL)
 		goto out;
@@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	umode_t mode;
 	int status = -ENOMEM;
 
-	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+	dprintk("NFS call  mknod %pd\n", dentry);
 
 	mode = sattr->ia_mode;
 	if (S_ISFIFO(mode)) {
@@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 	};
 	int status = -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+	dprintk("NFS call  symlink %pd\n", dentry);
 
 	if (len > NFS2_MAXPATHLEN)
 		goto out;
@@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	};
 	int status = -ENOMEM;
 
-	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+	dprintk("NFS call  mkdir %pd\n", dentry);
 	data = nfs_alloc_createdata(dir, dentry, sattr);
 	if (data == NULL)
 		goto out;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index bb939edd4c99..8285de9eaad2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -495,9 +495,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	struct rpc_task *task;
 	int            error = -EIO;
 
-	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		d_count(dentry));
+	dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
+		dentry, d_count(dentry));
 	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 	/*
@@ -522,8 +521,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 				SILLYNAME_FILEID_LEN, fileid,
 				SILLYNAME_COUNTER_LEN, sillycounter);
 
-		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
-				dentry->d_name.name, silly);
+		dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
+				dentry, silly);
 
 		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
 		/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ac1dc331ba31..c1d548211c31 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -954,10 +954,8 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
 
-	dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n",
-		file->f_path.dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name, count,
-		(long long)(page_file_offset(page) + offset));
+	dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
+		file, count, (long long)(page_file_offset(page) + offset));
 
 	if (nfs_can_extend_write(file, page, inode)) {
 		count = max(count + offset, nfs_page_length(page));

From a6a9f18f0a9f943ada095753bdc4346aee67b1aa Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:57:01 -0400
Subject: [PATCH 010/101] nfsd: switch to %p[dD]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4recover.c | 12 ++++++------
 fs/nfsd/nfs4state.c   | 15 ++++++---------
 fs/nfsd/nfsfh.c       | 28 +++++++++++++---------------
 fs/nfsd/nfsfh.h       |  4 ++--
 fs/nfsd/vfs.c         |  9 ++++-----
 5 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e0a65a9e37e9..9c271f42604a 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 
 	status = vfs_rmdir(parent->d_inode, child);
 	if (status)
-		printk("failed to remove client recovery directory %s\n",
-				child->d_name.name);
+		printk("failed to remove client recovery directory %pd\n",
+				child);
 	/* Keep trying, success or failure: */
 	return 0;
 }
@@ -410,15 +410,15 @@ out:
 	nfs4_release_reclaim(nn);
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
-			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
+			" directory %pD\n", nn->rec_file);
 }
 
 static int
 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	if (child->d_name.len != HEXDIR_LEN - 1) {
-		printk("nfsd4: illegal name %s in recovery directory\n",
-				child->d_name.name);
+		printk("nfsd4: illegal name %pd in recovery directory\n",
+				child);
 		/* Keep trying; maybe the others are OK: */
 		return 0;
 	}
@@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) {
 	status = nfsd4_list_rec_dir(load_recdir, nn);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
-			" directory %s\n", nn->rec_file->f_path.dentry->d_name.name);
+			" directory %pD\n", nn->rec_file);
 	return status;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0874998a49cd..a601fd49f997 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3843,9 +3843,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct nfs4_ol_stateid *stp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
-	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
-			(int)cstate->current_fh.fh_dentry->d_name.len,
-			cstate->current_fh.fh_dentry->d_name.name);
+	dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
+			cstate->current_fh.fh_dentry);
 
 	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
 	if (status)
@@ -3922,9 +3921,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 	struct nfs4_ol_stateid *stp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
-	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
-			(int)cstate->current_fh.fh_dentry->d_name.len,
-			cstate->current_fh.fh_dentry->d_name.name);
+	dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", 
+			cstate->current_fh.fh_dentry);
 
 	/* We don't yet support WANT bits: */
 	if (od->od_deleg_want)
@@ -3980,9 +3978,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct net *net = SVC_NET(rqstp);
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	dprintk("NFSD: nfsd4_close on file %.*s\n", 
-			(int)cstate->current_fh.fh_dentry->d_name.len,
-			cstate->current_fh.fh_dentry->d_name.name);
+	dprintk("NFSD: nfsd4_close on file %pd\n", 
+			cstate->current_fh.fh_dentry);
 
 	nfs4_lock_state();
 	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 814afaa4458a..3d0e15ae6f72 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 		tdentry = parent;
 	}
 	if (tdentry != exp->ex_path.dentry)
-		dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
+		dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry);
 	rv = (tdentry == exp->ex_path.dentry);
 	dput(tdentry);
 	return rv;
@@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 			(dentry->d_flags & DCACHE_DISCONNECTED)) {
-		printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
-				dentry->d_parent->d_name.name, dentry->d_name.name);
+		printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n",
+				dentry);
 	}
 
 	fhp->fh_dentry = dentry;
@@ -361,10 +361,9 @@ skip_pseudoflavor_check:
 	error = nfsd_permission(rqstp, exp, dentry, access);
 
 	if (error) {
-		dprintk("fh_verify: %s/%s permission failure, "
+		dprintk("fh_verify: %pd2 permission failure, "
 			"acc=%x, error=%d\n",
-			dentry->d_parent->d_name.name,
-			dentry->d_name.name,
+			dentry,
 			access, ntohl(error));
 	}
 out:
@@ -514,14 +513,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	 */
 
 	struct inode * inode = dentry->d_inode;
-	struct dentry *parent = dentry->d_parent;
 	__u32 *datap;
 	dev_t ex_dev = exp_sb(exp)->s_dev;
 
-	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
 		MAJOR(ex_dev), MINOR(ex_dev),
 		(long) exp->ex_path.dentry->d_inode->i_ino,
-		parent->d_name.name, dentry->d_name.name,
+		dentry,
 		(inode ? inode->i_ino : 0));
 
 	/* Choose filehandle version and fsid type based on
@@ -534,13 +532,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 		fh_put(ref_fh);
 
 	if (fhp->fh_locked || fhp->fh_dentry) {
-		printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
-		       parent->d_name.name, dentry->d_name.name);
+		printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
+		       dentry);
 	}
 	if (fhp->fh_maxsize < NFS_FHSIZE)
-		printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
+		printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n",
 		       fhp->fh_maxsize,
-		       parent->d_name.name, dentry->d_name.name);
+		       dentry);
 
 	fhp->fh_dentry = dget(dentry); /* our internal copy */
 	fhp->fh_export = exp;
@@ -613,8 +611,8 @@ out_bad:
 	printk(KERN_ERR "fh_update: fh not verified!\n");
 	goto out;
 out_negative:
-	printk(KERN_ERR "fh_update: %s/%s still negative!\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	printk(KERN_ERR "fh_update: %pd2 still negative!\n",
+		dentry);
 	goto out;
 }
 
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index e5e6707ba687..4775bc4896c8 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
 	BUG_ON(!dentry);
 
 	if (fhp->fh_locked) {
-		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+		printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
+			dentry);
 		return;
 	}
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c827acb0e943..13886f7f40d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1317,9 +1317,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		if (!fhp->fh_locked) {
 			/* not actually possible */
 			printk(KERN_ERR
-				"nfsd_create: parent %s/%s not locked!\n",
-				dentry->d_parent->d_name.name,
-				dentry->d_name.name);
+				"nfsd_create: parent %pd2 not locked!\n",
+				dentry);
 			err = nfserr_io;
 			goto out;
 		}
@@ -1329,8 +1328,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 */
 	err = nfserr_exist;
 	if (dchild->d_inode) {
-		dprintk("nfsd_create: dentry %s/%s not negative!\n",
-			dentry->d_name.name, dchild->d_name.name);
+		dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
+			dentry, dchild);
 		goto out; 
 	}
 

From 1e903edadf3981ac22bba5587fa18d91908f066f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:57:41 -0400
Subject: [PATCH 011/101] sunrpc: switch to %pd

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/sunrpc/rpc_pipe.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index f94567b45bb3..d0d14a04dce1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -519,8 +519,8 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
 	d_add(dentry, inode);
 	return 0;
 out_err:
-	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
-			__FILE__, __func__, dentry->d_name.name);
+	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
+			__FILE__, __func__, dentry);
 	dput(dentry);
 	return -ENOMEM;
 }
@@ -755,8 +755,8 @@ static int rpc_populate(struct dentry *parent,
 out_bad:
 	__rpc_depopulate(parent, files, start, eof);
 	mutex_unlock(&dir->i_mutex);
-	printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
-			__FILE__, __func__, parent->d_name.name);
+	printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
+			__FILE__, __func__, parent);
 	return err;
 }
 
@@ -852,8 +852,8 @@ out:
 	return dentry;
 out_err:
 	dentry = ERR_PTR(err);
-	printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
-			__FILE__, __func__, parent->d_name.name, name,
+	printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
+			__FILE__, __func__, parent, name,
 			err);
 	goto out;
 }

From 4cb2a01d8c4bd18a0bb292f32e9876b0c4461e12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:58:53 -0400
Subject: [PATCH 012/101] ubifs: switch to %pd

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ubifs/dir.c     | 41 +++++++++++++++++++----------------------
 fs/ubifs/journal.c |  6 ++----
 fs/ubifs/xattr.c   | 16 ++++++++--------
 3 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 6b4947f75af7..ea41649e4ca5 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -192,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 	struct ubifs_dent_node *dent;
 	struct ubifs_info *c = dir->i_sb->s_fs_info;
 
-	dbg_gen("'%.*s' in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+	dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
 
 	if (dentry->d_name.len > UBIFS_MAX_NLEN)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -225,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 		 * checking.
 		 */
 		err = PTR_ERR(inode);
-		ubifs_err("dead directory entry '%.*s', error %d",
-			  dentry->d_name.len, dentry->d_name.name, err);
+		ubifs_err("dead directory entry '%pd', error %d",
+			  dentry, err);
 		ubifs_ro_mode(c, err);
 		goto out;
 	}
@@ -260,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	 * parent directory inode.
 	 */
 
-	dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+		dentry, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
 	if (err)
@@ -509,8 +508,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 	 * changing the parent inode.
 	 */
 
-	dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+	dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
+		dentry, inode->i_ino,
 		inode->i_nlink, dir->i_ino);
 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -566,8 +565,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 	 * deletions.
 	 */
 
-	dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+	dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
+		dentry, inode->i_ino,
 		inode->i_nlink, dir->i_ino);
 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
@@ -656,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 	 * because we have extra space reserved for deletions.
 	 */
 
-	dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
-		dentry->d_name.name, inode->i_ino, dir->i_ino);
+	dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
+		inode->i_ino, dir->i_ino);
 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
 	err = check_dir_empty(c, dentry->d_inode);
@@ -716,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+	dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+		dentry, mode, dir->i_ino);
 
 	err = ubifs_budget_space(c, &req);
 	if (err)
@@ -778,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%.*s' in dir ino %lu",
-		dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+	dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -853,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
 	 * directory inode.
 	 */
 
-	dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
-		dentry->d_name.name, symname, dir->i_ino);
+	dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
+		symname, dir->i_ino);
 
 	if (len > UBIFS_MAX_INO_DATA)
 		return -ENAMETOOLONG;
@@ -979,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * separately.
 	 */
 
-	dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu",
-		old_dentry->d_name.len, old_dentry->d_name.name,
-		old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
-		new_dentry->d_name.name, new_dir->i_ino);
+	dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
+		old_dentry, old_inode->i_ino, old_dir->i_ino,
+		new_dentry, new_dir->i_ino);
 	ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
 	ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
 	if (unlink)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index afaad07f3b29..0e045e75abd8 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -933,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	int move = (old_dir != new_dir);
 	struct ubifs_inode *uninitialized_var(new_ui);
 
-	dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
-		old_dentry->d_name.len, old_dentry->d_name.name,
-		old_dir->i_ino, new_dentry->d_name.len,
-		new_dentry->d_name.name, new_dir->i_ino);
+	dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
+		old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
 	ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
 	ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
 	ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 0f7139bdb2c2..5e0a63b1b0d5 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
 	union ubifs_key key;
 	int err, type;
 
-	dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
-		host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+	dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
+		host->i_ino, dentry, size);
 	ubifs_assert(mutex_is_locked(&host->i_mutex));
 
 	if (size > UBIFS_MAX_INO_DATA)
@@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
 	union ubifs_key key;
 	int err;
 
-	dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
-		host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+	dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
+		host->i_ino, dentry, size);
 
 	err = check_namespace(&nm);
 	if (err < 0)
@@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	int err, len, written = 0;
 	struct qstr nm = { .name = NULL };
 
-	dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
-		dentry->d_name.len, dentry->d_name.name, size);
+	dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
+		dentry, size);
 
 	len = host_ui->xattr_names + host_ui->xattr_cnt;
 	if (!buffer)
@@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
 	union ubifs_key key;
 	int err;
 
-	dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
-		host->i_ino, dentry->d_name.len, dentry->d_name.name);
+	dbg_gen("xattr '%s', ino %lu ('%pd')", name,
+		host->i_ino, dentry);
 	ubifs_assert(mutex_is_locked(&host->i_mutex));
 
 	err = check_namespace(&nm);

From 84eb3532b574bb6a65c0aad9591b28b2adcd47d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 10:59:55 -0400
Subject: [PATCH 013/101] ncpfs: switch to %p[dD]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/dir.c  | 55 ++++++++++++++++++-------------------------------
 fs/ncpfs/file.c | 12 ++++-------
 2 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 3be047474bfc..c320ac52353e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,9 +339,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
 	if (val)
 		goto finished;
 
-	DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		NCP_GET_AGE(dentry));
+	DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n",
+		dentry, NCP_GET_AGE(dentry));
 
 	len = sizeof(__name);
 	if (ncp_is_server_root(dir)) {
@@ -359,8 +358,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
 			res = ncp_obtain_info(server, dir, __name, &(finfo.i));
 	}
 	finfo.volume = finfo.i.volNumber;
-	DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n",
-		dentry->d_parent->d_name.name, __name, res);
+	DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n",
+		dentry->d_parent, __name, res);
 	/*
 	 * If we didn't find it, or if it has a different dirEntNum to
 	 * what we remember, it's not valid any more.
@@ -454,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
 	ctl.page  = NULL;
 	ctl.cache = NULL;
 
-	DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
+	DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file,
 		(int) ctx->pos);
 
 	result = -EIO;
@@ -740,12 +738,10 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
 	int more;
 	size_t bufsize;
 
-	DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
+	DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file,
 		(unsigned long) ctx->pos);
-	PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n",
-		dentry->d_name.name, NCP_FINFO(dir)->volNumber,
-		NCP_FINFO(dir)->dirEntNum);
+	PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n",
+		file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
 
 	err = ncp_initialize_search(server, dir, &seq);
 	if (err) {
@@ -850,8 +846,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
 	if (!ncp_conn_valid(server))
 		goto finished;
 
-	PPRINTK("ncp_lookup: server lookup for %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry);
 
 	len = sizeof(__name);
 	if (ncp_is_server_root(dir)) {
@@ -867,8 +862,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
 		if (!res)
 			res = ncp_obtain_info(server, dir, __name, &(finfo.i));
 	}
-	PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n",
-		dentry->d_parent->d_name.name, __name, res);
+	PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res);
 	/*
 	 * If we didn't find an entry, make a negative dentry.
 	 */
@@ -915,8 +909,7 @@ out:
 	return error;
 
 out_close:
-	PPRINTK("ncp_instantiate: %s/%s failed, closing file\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry);
 	ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
 	goto out;
 }
@@ -930,8 +923,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
 	int opmode;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 	
-	PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name, mode);
+	PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode);
 
 	ncp_age_dentry(server, dentry);
 	len = sizeof(__name);
@@ -960,8 +952,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
 				error = -ENAMETOOLONG;
 			else if (result < 0)
 				error = result;
-			DPRINTK("ncp_create: %s/%s failed\n",
-				dentry->d_parent->d_name.name, dentry->d_name.name);
+			DPRINTK("ncp_create: %pd2 failed\n", dentry);
 			goto out;
 		}
 		opmode = O_WRONLY;
@@ -994,8 +985,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	int error, len;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
-	DPRINTK("ncp_mkdir: making %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_mkdir: making %pd2\n", dentry);
 
 	ncp_age_dentry(server, dentry);
 	len = sizeof(__name);
@@ -1032,8 +1022,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
 	int error, result, len;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
-	DPRINTK("ncp_rmdir: removing %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_rmdir: removing %pd2\n", dentry);
 
 	len = sizeof(__name);
 	error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1078,8 +1067,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 	int error;
 
 	server = NCP_SERVER(dir);
-	DPRINTK("ncp_unlink: unlinking %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_unlink: unlinking %pd2\n", dentry);
 	
 	/*
 	 * Check whether to close the file ...
@@ -1099,8 +1087,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 #endif
 	switch (error) {
 		case 0x00:
-			DPRINTK("ncp: removed %s/%s\n",
-				dentry->d_parent->d_name.name, dentry->d_name.name);
+			DPRINTK("ncp: removed %pd2\n", dentry);
 			break;
 		case 0x85:
 		case 0x8A:
@@ -1133,9 +1120,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int old_len, new_len;
 	__u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
 
-	DPRINTK("ncp_rename: %s/%s to %s/%s\n",
-		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
-		new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
+	DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry);
 
 	ncp_age_dentry(server, old_dentry);
 	ncp_age_dentry(server, new_dentry);
@@ -1165,8 +1150,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 #endif
 	switch (error) {
 		case 0x00:
-               	        DPRINTK("ncp renamed %s -> %s.\n",
-                                old_dentry->d_name.name,new_dentry->d_name.name);
+               	        DPRINTK("ncp renamed %pd -> %pd.\n",
+                                old_dentry, new_dentry);
 			break;
 		case 0x9E:
 			error = -ENAMETOOLONG;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 122e260247f5..8f5074e1ecb9 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -107,8 +107,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	void* freepage;
 	size_t freelen;
 
-	DPRINTK("ncp_file_read: enter %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_file_read: enter %pd2\n", dentry);
 
 	pos = *ppos;
 
@@ -166,8 +165,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 	file_accessed(file);
 
-	DPRINTK("ncp_file_read: exit %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_file_read: exit %pd2\n", dentry);
 outrel:
 	ncp_inode_close(inode);		
 	return already_read ? already_read : error;
@@ -184,8 +182,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 	int errno;
 	void* bouncebuffer;
 
-	DPRINTK("ncp_file_write: enter %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_file_write: enter %pd2\n", dentry);
 	if ((ssize_t) count < 0)
 		return -EINVAL;
 	pos = *ppos;
@@ -264,8 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 			i_size_write(inode, pos);
 		mutex_unlock(&inode->i_mutex);
 	}
-	DPRINTK("ncp_file_write: exit %s/%s\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name);
+	DPRINTK("ncp_file_write: exit %pd2\n", dentry);
 outrel:
 	ncp_inode_close(inode);		
 	return already_written ? already_written : errno;

From 7b00ed6fe6321547b9d51f5bfb30e7b5932e6889 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 21:19:20 -0400
Subject: [PATCH 014/101] put_mnt_ns(): use drop_collected_mounts()

... rather than open-coding it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index da5c49483430..5918fc31a636 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2802,11 +2802,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 {
 	if (!atomic_dec_and_test(&ns->count))
 		return;
-	namespace_lock();
-	br_write_lock(&vfsmount_lock);
-	umount_tree(ns->root, 0);
-	br_write_unlock(&vfsmount_lock);
-	namespace_unlock();
+	drop_collected_mounts(&ns->root->mnt);
 	free_mnt_ns(ns);
 }
 

From e84f9e57b90ca89664d733a7cef19aa7ccd832f3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Sep 2013 14:17:15 -0400
Subject: [PATCH 015/101] consolidate the reassignments of ->f_op in ->open()
 instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/misc.c             | 12 +++---------
 drivers/gpu/drm/drm_fops.c      | 17 ++++++-----------
 drivers/media/dvb-core/dvbdev.c | 19 ++++++-------------
 drivers/usb/core/file.c         | 16 ++++------------
 fs/char_dev.c                   |  6 ++++--
 include/linux/fs.h              | 11 +++++++++++
 sound/core/sound.c              | 22 ++++++----------------
 sound/sound_core.c              | 17 +++--------------
 8 files changed, 43 insertions(+), 77 deletions(-)

diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 190d4423653f..fd0f7c4b4df9 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -114,7 +114,7 @@ static int misc_open(struct inode * inode, struct file * file)
 	int minor = iminor(inode);
 	struct miscdevice *c;
 	int err = -ENODEV;
-	const struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *new_fops = NULL;
 
 	mutex_lock(&misc_mtx);
 	
@@ -141,17 +141,11 @@ static int misc_open(struct inode * inode, struct file * file)
 	}
 
 	err = 0;
-	old_fops = file->f_op;
-	file->f_op = new_fops;
+	replace_fops(file, new_fops);
 	if (file->f_op->open) {
 		file->private_data = c;
-		err=file->f_op->open(inode,file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
+		err = file->f_op->open(inode,file);
 	}
-	fops_put(old_fops);
 fail:
 	mutex_unlock(&misc_mtx);
 	return err;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3f84277d7036..22d14ecbd3ec 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -148,7 +148,7 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	struct drm_minor *minor;
 	int minor_id = iminor(inode);
 	int err = -ENODEV;
-	const struct file_operations *old_fops;
+	const struct file_operations *new_fops;
 
 	DRM_DEBUG("\n");
 
@@ -163,18 +163,13 @@ int drm_stub_open(struct inode *inode, struct file *filp)
 	if (drm_device_is_unplugged(dev))
 		goto out;
 
-	old_fops = filp->f_op;
-	filp->f_op = fops_get(dev->driver->fops);
-	if (filp->f_op == NULL) {
-		filp->f_op = old_fops;
+	new_fops = fops_get(dev->driver->fops);
+	if (!new_fops)
 		goto out;
-	}
-	if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) {
-		fops_put(filp->f_op);
-		filp->f_op = fops_get(old_fops);
-	}
-	fops_put(old_fops);
 
+	replace_fops(filp, new_fops);
+	if (filp->f_op->open)
+		err = filp->f_op->open(inode, filp);
 out:
 	mutex_unlock(&drm_global_mutex);
 	return err;
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 401ef64f92c6..983db75de350 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -74,22 +74,15 @@ static int dvb_device_open(struct inode *inode, struct file *file)
 
 	if (dvbdev && dvbdev->fops) {
 		int err = 0;
-		const struct file_operations *old_fops;
+		const struct file_operations *new_fops;
 
-		file->private_data = dvbdev;
-		old_fops = file->f_op;
-		file->f_op = fops_get(dvbdev->fops);
-		if (file->f_op == NULL) {
-			file->f_op = old_fops;
+		new_fops = fops_get(dvbdev->fops);
+		if (!new_fops)
 			goto fail;
-		}
-		if(file->f_op->open)
+		file->private_data = dvbdev;
+		replace_fops(file, new_fops);
+		if (file->f_op->open)
 			err = file->f_op->open(inode,file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-		fops_put(old_fops);
 		up_read(&minor_rwsem);
 		mutex_unlock(&dvbdev_mutex);
 		return err;
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 7421888087a3..fe0d8365411a 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -29,27 +29,19 @@ static DECLARE_RWSEM(minor_rwsem);
 
 static int usb_open(struct inode * inode, struct file * file)
 {
-	int minor = iminor(inode);
-	const struct file_operations *c;
 	int err = -ENODEV;
-	const struct file_operations *old_fops, *new_fops = NULL;
+	const struct file_operations *new_fops;
 
 	down_read(&minor_rwsem);
-	c = usb_minors[minor];
+	new_fops = fops_get(usb_minors[iminor(inode)]);
 
-	if (!c || !(new_fops = fops_get(c)))
+	if (!new_fops)
 		goto done;
 
-	old_fops = file->f_op;
-	file->f_op = new_fops;
+	replace_fops(file, new_fops);
 	/* Curiouser and curiouser... NULL ->open() as "no device" ? */
 	if (file->f_op->open)
 		err = file->f_op->open(inode,file);
-	if (err) {
-		fops_put(file->f_op);
-		file->f_op = fops_get(old_fops);
-	}
-	fops_put(old_fops);
  done:
 	up_read(&minor_rwsem);
 	return err;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index afc2bb691780..94b5f60076da 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -368,6 +368,7 @@ void cdev_put(struct cdev *p)
  */
 static int chrdev_open(struct inode *inode, struct file *filp)
 {
+	const struct file_operations *fops;
 	struct cdev *p;
 	struct cdev *new = NULL;
 	int ret = 0;
@@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		return ret;
 
 	ret = -ENXIO;
-	filp->f_op = fops_get(p->ops);
-	if (!filp->f_op)
+	fops = fops_get(p->ops);
+	if (!fops)
 		goto out_cdev_put;
 
+	replace_fops(filp, fops);
 	if (filp->f_op->open) {
 		ret = filp->f_op->open(inode, filp);
 		if (ret)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d80c2437d624..b09e4e1d747a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1875,6 +1875,17 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
 	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
 #define fops_put(fops) \
 	do { if (fops) module_put((fops)->owner); } while(0)
+/*
+ * This one is to be used *ONLY* from ->open() instances.
+ * fops must be non-NULL, pinned down *and* module dependencies
+ * should be sufficient to pin the caller down as well.
+ */
+#define replace_fops(f, fops) \
+	do {	\
+		struct file *__file = (f); \
+		fops_put(__file->f_op); \
+		BUG_ON(!(__file->f_op = (fops))); \
+	} while(0)
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
diff --git a/sound/core/sound.c b/sound/core/sound.c
index f002bd911dae..437c25ea6403 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -153,7 +153,7 @@ static int snd_open(struct inode *inode, struct file *file)
 {
 	unsigned int minor = iminor(inode);
 	struct snd_minor *mptr = NULL;
-	const struct file_operations *old_fops;
+	const struct file_operations *new_fops;
 	int err = 0;
 
 	if (minor >= ARRAY_SIZE(snd_minors))
@@ -167,24 +167,14 @@ static int snd_open(struct inode *inode, struct file *file)
 			return -ENODEV;
 		}
 	}
-	old_fops = file->f_op;
-	file->f_op = fops_get(mptr->f_ops);
-	if (file->f_op == NULL) {
-		file->f_op = old_fops;
-		err = -ENODEV;
-	}
+	new_fops = fops_get(mptr->f_ops);
 	mutex_unlock(&sound_mutex);
-	if (err < 0)
-		return err;
+	if (!new_fops)
+		return -ENODEV;
+	replace_fops(file, new_fops);
 
-	if (file->f_op->open) {
+	if (file->f_op->open)
 		err = file->f_op->open(inode, file);
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-	}
-	fops_put(old_fops);
 	return err;
 }
 
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 45759f4cca75..11e953a1fa45 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -626,31 +626,20 @@ static int soundcore_open(struct inode *inode, struct file *file)
 		if (s)
 			new_fops = fops_get(s->unit_fops);
 	}
+	spin_unlock(&sound_loader_lock);
 	if (new_fops) {
 		/*
 		 * We rely upon the fact that we can't be unloaded while the
-		 * subdriver is there, so if ->open() is successful we can
-		 * safely drop the reference counter and if it is not we can
-		 * revert to old ->f_op. Ugly, indeed, but that's the cost of
-		 * switching ->f_op in the first place.
+		 * subdriver is there.
 		 */
 		int err = 0;
-		const struct file_operations *old_fops = file->f_op;
-		file->f_op = new_fops;
-		spin_unlock(&sound_loader_lock);
+		replace_fops(file, new_fops);
 
 		if (file->f_op->open)
 			err = file->f_op->open(inode,file);
 
-		if (err) {
-			fops_put(file->f_op);
-			file->f_op = fops_get(old_fops);
-		}
-
-		fops_put(old_fops);
 		return err;
 	}
-	spin_unlock(&sound_loader_lock);
 	return -ENODEV;
 }
 

From 79b185dd1a40cf360c9d88ada7ed21f79d818613 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Sep 2013 14:33:32 -0400
Subject: [PATCH 016/101] dmxdev: get rid of pointless clearing ->f_op

nobody else will see that struct file after return from ->release()
anyway; just leave ->f_op as is and let __fput() do that fops_put().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/media/dvb-core/dmxdev.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 0b4616b87195..c0363f1b6c90 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -206,8 +206,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
 	/* TODO */
 	dvbdev->users--;
 	if (dvbdev->users == 1 && dmxdev->exit == 1) {
-		fops_put(file->f_op);
-		file->f_op = NULL;
 		mutex_unlock(&dmxdev->mutex);
 		wake_up(&dvbdev->wait_queue);
 	} else
@@ -1120,8 +1118,6 @@ static int dvb_demux_release(struct inode *inode, struct file *file)
 	mutex_lock(&dmxdev->mutex);
 	dmxdev->dvbdev->users--;
 	if(dmxdev->dvbdev->users==1 && dmxdev->exit==1) {
-		fops_put(file->f_op);
-		file->f_op = NULL;
 		mutex_unlock(&dmxdev->mutex);
 		wake_up(&dmxdev->dvbdev->wait_queue);
 	} else

From 22bd002ee76aa7d7a3393f39d977f6c106153c60 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Sep 2013 14:42:05 -0400
Subject: [PATCH 017/101] rtl8188eu: remove dead code

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 .../staging/rtl8188eu/include/osdep_service.h |   5 -
 .../staging/rtl8188eu/os_dep/osdep_service.c  | 208 ------------------
 2 files changed, 213 deletions(-)

diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h
index 44f24fa31a34..36523edf6a71 100644
--- a/drivers/staging/rtl8188eu/include/osdep_service.h
+++ b/drivers/staging/rtl8188eu/include/osdep_service.h
@@ -430,11 +430,6 @@ int ATOMIC_SUB_RETURN(ATOMIC_T *v, int i);
 int ATOMIC_INC_RETURN(ATOMIC_T *v);
 int ATOMIC_DEC_RETURN(ATOMIC_T *v);
 
-/* File operation APIs, just for linux now */
-int rtw_is_file_readable(char *path);
-int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz);
-int rtw_store_to_file(char *path, u8 __user *buf, u32 sz);
-
 struct rtw_netdev_priv_indicator {
 	void *priv;
 	u32 sizeof_priv;
diff --git a/drivers/staging/rtl8188eu/os_dep/osdep_service.c b/drivers/staging/rtl8188eu/os_dep/osdep_service.c
index 4e0bfb7e153b..4aedb85a3527 100644
--- a/drivers/staging/rtl8188eu/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8188eu/os_dep/osdep_service.c
@@ -356,214 +356,6 @@ inline int ATOMIC_DEC_RETURN(ATOMIC_T *v)
 	return atomic_dec_return(v);
 }
 
-/* Open a file with the specific @param path, @param flag, @param mode
- * @param fpp the pointer of struct file pointer to get struct file pointer while file opening is success
- * @param path the path of the file to open
- * @param flag file operation flags, please refer to linux document
- * @param mode please refer to linux document
- * @return Linux specific error code
- */
-static int openfile(struct file **fpp, char *path, int flag, int mode)
-{
-	struct file *fp;
-
-	fp = filp_open(path, flag, mode);
-	if (IS_ERR(fp)) {
-		*fpp = NULL;
-		return PTR_ERR(fp);
-	} else {
-		*fpp = fp;
-		return 0;
-	}
-}
-
-/* Close the file with the specific @param fp
- * @param fp the pointer of struct file to close
- * @return always 0
- */
-static int closefile(struct file *fp)
-{
-	filp_close(fp, NULL);
-	return 0;
-}
-
-static int readfile(struct file *fp, char __user *buf, int len)
-{
-	int rlen = 0, sum = 0;
-
-	if (!fp->f_op || !fp->f_op->read)
-		return -EPERM;
-
-	while (sum < len) {
-		rlen = fp->f_op->read(fp, buf+sum, len-sum, &fp->f_pos);
-		if (rlen > 0)
-			sum += rlen;
-		else if (0 != rlen)
-			return rlen;
-		else
-			break;
-	}
-	return  sum;
-}
-
-static int writefile(struct file *fp, char __user *buf, int len)
-{
-	int wlen = 0, sum = 0;
-
-	if (!fp->f_op || !fp->f_op->write)
-		return -EPERM;
-
-	while (sum < len) {
-		wlen = fp->f_op->write(fp, buf+sum, len-sum, &fp->f_pos);
-		if (wlen > 0)
-			sum += wlen;
-		else if (0 != wlen)
-			return wlen;
-		else
-			break;
-	}
-	return sum;
-}
-
-/* Test if the specifi @param path is a file and readable
- * @param path the path of the file to test
- * @return Linux specific error code
- */
-static int isfilereadable(char *path)
-{
-	struct file *fp;
-	int ret = 0;
-	mm_segment_t oldfs;
-	char __user buf;
-
-	fp = filp_open(path, O_RDONLY, 0);
-	if (IS_ERR(fp)) {
-		ret = PTR_ERR(fp);
-	} else {
-		oldfs = get_fs(); set_fs(get_ds());
-
-		if (1 != readfile(fp, &buf, 1))
-			ret = PTR_ERR(fp);
-
-		set_fs(oldfs);
-		filp_close(fp, NULL);
-	}
-	return ret;
-}
-
-/* Open the file with @param path and retrive the file content into
- * memory starting from @param buf for @param sz at most
- * @param path the path of the file to open and read
- * @param buf the starting address of the buffer to store file content
- * @param sz how many bytes to read at most
- * @return the byte we've read, or Linux specific error code
- */
-static int retrievefromfile(char *path, u8 __user *buf, u32 sz)
-{
-	int ret = -1;
-	mm_segment_t oldfs;
-	struct file *fp;
-
-	if (path && buf) {
-		ret = openfile(&fp, path, O_RDONLY, 0);
-		if (0 == ret) {
-			DBG_88E("%s openfile path:%s fp =%p\n", __func__,
-				path, fp);
-
-			oldfs = get_fs(); set_fs(get_ds());
-			ret = readfile(fp, buf, sz);
-			set_fs(oldfs);
-			closefile(fp);
-
-			DBG_88E("%s readfile, ret:%d\n", __func__, ret);
-
-		} else {
-			DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__,
-				path, ret);
-		}
-	} else {
-		DBG_88E("%s NULL pointer\n", __func__);
-		ret =  -EINVAL;
-	}
-	return ret;
-}
-
-/*
-* Open the file with @param path and wirte @param sz byte of data starting from @param buf into the file
-* @param path the path of the file to open and write
-* @param buf the starting address of the data to write into file
-* @param sz how many bytes to write at most
-* @return the byte we've written, or Linux specific error code
-*/
-static int storetofile(char *path, u8 __user *buf, u32 sz)
-{
-	int ret = 0;
-	mm_segment_t oldfs;
-	struct file *fp;
-
-	if (path && buf) {
-		ret = openfile(&fp, path, O_CREAT|O_WRONLY, 0666);
-		if (0 == ret) {
-			DBG_88E("%s openfile path:%s fp =%p\n", __func__, path, fp);
-
-			oldfs = get_fs(); set_fs(get_ds());
-			ret = writefile(fp, buf, sz);
-			set_fs(oldfs);
-			closefile(fp);
-
-			DBG_88E("%s writefile, ret:%d\n", __func__, ret);
-
-		} else {
-			DBG_88E("%s openfile path:%s Fail, ret:%d\n", __func__, path, ret);
-		}
-	} else {
-		DBG_88E("%s NULL pointer\n", __func__);
-		ret =  -EINVAL;
-	}
-	return ret;
-}
-
-/*
-* Test if the specifi @param path is a file and readable
-* @param path the path of the file to test
-* @return true or false
-*/
-int rtw_is_file_readable(char *path)
-{
-	if (isfilereadable(path) == 0)
-		return true;
-	else
-		return false;
-}
-
-/*
-* Open the file with @param path and retrive the file content into memory starting from @param buf for @param sz at most
-* @param path the path of the file to open and read
-* @param buf the starting address of the buffer to store file content
-* @param sz how many bytes to read at most
-* @return the byte we've read
-*/
-int rtw_retrive_from_file(char *path, u8 __user *buf, u32 sz)
-{
-	int ret = retrievefromfile(path, buf, sz);
-
-	return ret >= 0 ? ret : 0;
-}
-
-/*
- * Open the file with @param path and wirte @param sz byte of data
- * starting from @param buf into the file
- * @param path the path of the file to open and write
- * @param buf the starting address of the data to write into file
- * @param sz how many bytes to write at most
- * @return the byte we've written
- */
-int rtw_store_to_file(char *path, u8 __user *buf, u32 sz)
-{
-	int ret = storetofile(path, buf, sz);
-	return ret >= 0 ? ret : 0;
-}
-
 struct net_device *rtw_alloc_etherdev_with_old_priv(int sizeof_priv,
 						    void *old_priv)
 {

From 72c2d53192004845cbc19cd8a30b3212a9288140 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 22 Sep 2013 16:27:52 -0400
Subject: [PATCH 018/101] file->f_op is never NULL...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/nand/nandsim.c               |  2 +-
 drivers/staging/comedi/comedi_compat32.c |  3 --
 fs/autofs4/autofs_i.h                    |  2 +-
 fs/autofs4/dev-ioctl.c                   |  6 ----
 fs/binfmt_aout.c                         |  4 +--
 fs/binfmt_elf.c                          |  6 ++--
 fs/binfmt_elf_fdpic.c                    |  2 +-
 fs/binfmt_em86.c                         |  2 +-
 fs/coda/dir.c                            |  3 --
 fs/coda/file.c                           |  6 ++--
 fs/compat_ioctl.c                        |  4 +--
 fs/coredump.c                            |  2 +-
 fs/ecryptfs/file.c                       |  8 +++---
 fs/eventpoll.c                           |  2 +-
 fs/exec.c                                | 35 ++++++++++--------------
 fs/fcntl.c                               |  5 ++--
 fs/file_table.c                          |  4 +--
 fs/ioctl.c                               |  4 +--
 fs/locks.c                               | 12 ++++----
 fs/open.c                                |  8 ++++--
 fs/read_write.c                          | 25 ++++++-----------
 fs/readdir.c                             |  2 +-
 fs/select.c                              |  4 +--
 fs/splice.c                              |  6 ++--
 fs/sync.c                                |  2 +-
 mm/memory.c                              |  2 +-
 mm/mmap.c                                |  4 +--
 mm/nommu.c                               |  2 +-
 net/9p/trans_fd.c                        |  4 +--
 29 files changed, 74 insertions(+), 97 deletions(-)

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index bdc1d15369f8..d1c7de92cfdf 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -575,7 +575,7 @@ static int alloc_device(struct nandsim *ns)
 		cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
 		if (IS_ERR(cfile))
 			return PTR_ERR(cfile);
-		if (!cfile->f_op || (!cfile->f_op->read && !cfile->f_op->aio_read)) {
+		if (!cfile->f_op->read && !cfile->f_op->aio_read) {
 			NS_ERR("alloc_device: cache file not readable\n");
 			err = -EINVAL;
 			goto err_close;
diff --git a/drivers/staging/comedi/comedi_compat32.c b/drivers/staging/comedi/comedi_compat32.c
index 2dfb06aedb15..1e9da405d833 100644
--- a/drivers/staging/comedi/comedi_compat32.c
+++ b/drivers/staging/comedi/comedi_compat32.c
@@ -86,9 +86,6 @@ struct comedi32_insnlist_struct {
 static int translated_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	if (!file->f_op)
-		return -ENOTTY;
-
 	if (file->f_op->unlocked_ioctl)
 		return file->f_op->unlocked_ioctl(file, cmd, arg);
 
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3f1128b37e46..ebab147e9700 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -271,7 +271,7 @@ void autofs4_clean_ino(struct autofs_info *);
 
 static inline int autofs_prepare_pipe(struct file *pipe)
 {
-	if (!pipe->f_op || !pipe->f_op->write)
+	if (!pipe->f_op->write)
 		return -EINVAL;
 	if (!S_ISFIFO(file_inode(pipe)->i_mode))
 		return -EINVAL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 0f00da329e71..1818ce7f5a06 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
 			goto out;
 		}
 
-		if (!fp->f_op) {
-			err = -ENOTTY;
-			fput(fp);
-			goto out;
-		}
-
 		sbi = autofs_dev_ioctl_sbi(fp);
 		if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
 			err = -EINVAL;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 89dec7f789a4..faaa819f70fa 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -221,7 +221,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 	 * Requires a mmap handler. This prevents people from using a.out
 	 * as part of an exploit attack against /proc-related vulnerabilities.
 	 */
-	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+	if (!bprm->file->f_op->mmap)
 		return -ENOEXEC;
 
 	fd_offset = N_TXTOFF(ex);
@@ -374,7 +374,7 @@ static int load_aout_library(struct file *file)
 	 * Requires a mmap handler. This prevents people from using a.out
 	 * as part of an exploit attack against /proc-related vulnerabilities.
 	 */
-	if (!file->f_op || !file->f_op->mmap)
+	if (!file->f_op->mmap)
 		goto out;
 
 	if (N_FLAGS(ex))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c94a79991bb..501c8a4d6eb1 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 		goto out;
 	if (!elf_check_arch(interp_elf_ex))
 		goto out;
-	if (!interpreter->f_op || !interpreter->f_op->mmap)
+	if (!interpreter->f_op->mmap)
 		goto out;
 
 	/*
@@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		goto out;
 	if (!elf_check_arch(&loc->elf_ex))
 		goto out;
-	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+	if (!bprm->file->f_op->mmap)
 		goto out;
 
 	/* Now read in all of the header information */
@@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file)
 
 	/* First of all, some simple consistency checks */
 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
-	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
+	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
 		goto out;
 
 	/* Now read in all of the header information */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c166f325a183..ea4c6273b4a5 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
 		return 0;
 	if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr))
 		return 0;
-	if (!file->f_op || !file->f_op->mmap)
+	if (!file->f_op->mmap)
 		return 0;
 	return 1;
 }
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 037a3e2b045b..f37b08cea1f7 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm)
 	/* First of all, some simple consistency checks */
 	if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
 		(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-		(!bprm->file->f_op || !bprm->file->f_op->mmap)) {
+		!bprm->file->f_op->mmap) {
 			return -ENOEXEC;
 	}
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 190effc6a6fa..8c8d469c9283 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op)
-		return -ENOTDIR;
-
 	if (host_file->f_op->iterate) {
 		struct inode *host_inode = file_inode(host_file);
 		mutex_lock(&host_inode->i_mutex);
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 380b798f8443..9e83b7790212 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->read)
+	if (!host_file->f_op->read)
 		return -EINVAL;
 
 	return host_file->f_op->read(host_file, buf, count, ppos);
@@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->write)
+	if (!host_file->f_op->write)
 		return -EINVAL;
 
 	host_inode = file_inode(host_file);
@@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->mmap)
+	if (!host_file->f_op->mmap)
 		return -ENODEV;
 
 	coda_inode = file_inode(coda_file);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 5d19acfa7c6c..dc52e13d58e0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 		/*FALL THROUGH*/
 
 	default:
-		if (f.file->f_op && f.file->f_op->compat_ioctl) {
+		if (f.file->f_op->compat_ioctl) {
 			error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
 			if (error != -ENOIOCTLCMD)
 				goto out_fput;
 		}
 
-		if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
+		if (!f.file->f_op->unlocked_ioctl)
 			goto do_ioctl;
 		break;
 	}
diff --git a/fs/coredump.c b/fs/coredump.c
index 9bdeca12ae0e..42c3b8423669 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo)
 		 */
 		if (!uid_eq(inode->i_uid, current_fsuid()))
 			goto close_fail;
-		if (!cprm.file->f_op || !cprm.file->f_op->write)
+		if (!cprm.file->f_op->write)
 			goto close_fail;
 		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
 			goto close_fail;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 992cf95830b5..6b1cd2b0c751 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
 {
 	struct file *lower_file = ecryptfs_file_to_lower(file);
 
-	if (lower_file->f_op && lower_file->f_op->flush) {
+	if (lower_file->f_op->flush) {
 		filemap_write_and_wait(file->f_mapping);
 		return lower_file->f_op->flush(lower_file, td);
 	}
@@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
 	struct file *lower_file = NULL;
 
 	lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file->f_op && lower_file->f_op->fasync)
+	if (lower_file->f_op->fasync)
 		rc = lower_file->f_op->fasync(fd, lower_file, flag);
 	return rc;
 }
@@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	if (ecryptfs_file_to_private(file))
 		lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+	if (lower_file->f_op && lower_file->f_op->unlocked_ioctl)
 		rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
 	return rc;
 }
@@ -332,7 +332,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	if (ecryptfs_file_to_private(file))
 		lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+	if (lower_file->f_op && lower_file->f_op->compat_ioctl)
 		rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
 	return rc;
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 473e09da7d02..31fd77cd81eb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1816,7 +1816,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 
 	/* The target file descriptor must support poll */
 	error = -EPERM;
-	if (!tf.file->f_op || !tf.file->f_op->poll)
+	if (!tf.file->f_op->poll)
 		goto error_tgt_fput;
 
 	/* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exec.c b/fs/exec.c
index 8875dd10ae7a..351440fbf1ee 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
  */
 SYSCALL_DEFINE1(uselib, const char __user *, library)
 {
+	struct linux_binfmt *fmt;
 	struct file *file;
 	struct filename *tmp = getname(library);
 	int error = PTR_ERR(tmp);
@@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 	fsnotify_open(file);
 
 	error = -ENOEXEC;
-	if(file->f_op) {
-		struct linux_binfmt * fmt;
 
-		read_lock(&binfmt_lock);
-		list_for_each_entry(fmt, &formats, lh) {
-			if (!fmt->load_shlib)
-				continue;
-			if (!try_module_get(fmt->module))
-				continue;
-			read_unlock(&binfmt_lock);
-			error = fmt->load_shlib(file);
-			read_lock(&binfmt_lock);
-			put_binfmt(fmt);
-			if (error != -ENOEXEC)
-				break;
-		}
+	read_lock(&binfmt_lock);
+	list_for_each_entry(fmt, &formats, lh) {
+		if (!fmt->load_shlib)
+			continue;
+		if (!try_module_get(fmt->module))
+			continue;
 		read_unlock(&binfmt_lock);
+		error = fmt->load_shlib(file);
+		read_lock(&binfmt_lock);
+		put_binfmt(fmt);
+		if (error != -ENOEXEC)
+			break;
 	}
+	read_unlock(&binfmt_lock);
 exit:
 	fput(file);
 out:
@@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
  */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-	umode_t mode;
-	struct inode * inode = file_inode(bprm->file);
+	struct inode *inode = file_inode(bprm->file);
+	umode_t mode = inode->i_mode;
 	int retval;
 
-	mode = inode->i_mode;
-	if (bprm->file->f_op == NULL)
-		return -EACCES;
 
 	/* clear any previous set[ug]id data from a previous binary */
 	bprm->cred->euid = current_euid();
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 65343c3741ff..ef6866592a0f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 				return -EINVAL;
 	}
 
-	if (filp->f_op && filp->f_op->check_flags)
+	if (filp->f_op->check_flags)
 		error = filp->f_op->check_flags(arg);
 	if (error)
 		return error;
@@ -64,8 +64,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 	/*
 	 * ->fasync() is responsible for setting the FASYNC bit.
 	 */
-	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
-			filp->f_op->fasync) {
+	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
 		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 		if (error < 0)
 			goto out;
diff --git a/fs/file_table.c b/fs/file_table.c
index abdd15ad13c9..e61e5529fa9d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -240,11 +240,11 @@ static void __fput(struct file *file)
 	locks_remove_flock(file);
 
 	if (unlikely(file->f_flags & FASYNC)) {
-		if (file->f_op && file->f_op->fasync)
+		if (file->f_op->fasync)
 			file->f_op->fasync(-1, file, 0);
 	}
 	ima_file_free(file);
-	if (file->f_op && file->f_op->release)
+	if (file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
 	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
diff --git a/fs/ioctl.c b/fs/ioctl.c
index fd507fb460f8..8ac3fad36192 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
 {
 	int error = -ENOTTY;
 
-	if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+	if (!filp->f_op->unlocked_ioctl)
 		goto out;
 
 	error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
@@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 
 	/* Did FASYNC state change ? */
 	if ((flag ^ filp->f_flags) & FASYNC) {
-		if (filp->f_op && filp->f_op->fasync)
+		if (filp->f_op->fasync)
 			/* fasync() adjusts filp->f_flags */
 			error = filp->f_op->fasync(fd, filp, on);
 		else
diff --git a/fs/locks.c b/fs/locks.c
index b27a3005d78d..f155ffd648bb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1579,7 +1579,7 @@ EXPORT_SYMBOL(generic_setlease);
 
 static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 {
-	if (filp->f_op && filp->f_op->setlease)
+	if (filp->f_op->setlease)
 		return filp->f_op->setlease(filp, arg, lease);
 	else
 		return generic_setlease(filp, arg, lease);
@@ -1771,7 +1771,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (error)
 		goto out_free;
 
-	if (f.file->f_op && f.file->f_op->flock)
+	if (f.file->f_op->flock)
 		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
@@ -1797,7 +1797,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
  */
 int vfs_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op && filp->f_op->lock)
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_GETLK, fl);
 	posix_test_lock(filp, fl);
 	return 0;
@@ -1909,7 +1909,7 @@ out:
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
 {
-	if (filp->f_op && filp->f_op->lock)
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, cmd, fl);
 	else
 		return posix_lock_file(filp, fl, conf);
@@ -2182,7 +2182,7 @@ void locks_remove_flock(struct file *filp)
 	if (!inode->i_flock)
 		return;
 
-	if (filp->f_op && filp->f_op->flock) {
+	if (filp->f_op->flock) {
 		struct file_lock fl = {
 			.fl_pid = current->tgid,
 			.fl_file = filp,
@@ -2246,7 +2246,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
  */
 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op && filp->f_op->lock)
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_CANCELLK, fl);
 	return 0;
 }
diff --git a/fs/open.c b/fs/open.c
index d420331ca32a..a1465b1ec8c7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -693,6 +693,10 @@ static int do_dentry_open(struct file *f,
 	}
 
 	f->f_op = fops_get(inode->i_fop);
+	if (unlikely(WARN_ON(!f->f_op))) {
+		error = -ENODEV;
+		goto cleanup_all;
+	}
 
 	error = security_file_open(f, cred);
 	if (error)
@@ -702,7 +706,7 @@ static int do_dentry_open(struct file *f,
 	if (error)
 		goto cleanup_all;
 
-	if (!open && f->f_op)
+	if (!open)
 		open = f->f_op->open;
 	if (open) {
 		error = open(inode, f);
@@ -1023,7 +1027,7 @@ int filp_close(struct file *filp, fl_owner_t id)
 		return 0;
 	}
 
-	if (filp->f_op && filp->f_op->flush)
+	if (filp->f_op->flush)
 		retval = filp->f_op->flush(filp, id);
 
 	if (likely(!(filp->f_mode & FMODE_PATH))) {
diff --git a/fs/read_write.c b/fs/read_write.c
index e3cd280b158c..58e440df1bc6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 
 	fn = no_llseek;
 	if (file->f_mode & FMODE_LSEEK) {
-		if (file->f_op && file->f_op->llseek)
+		if (file->f_op->llseek)
 			fn = file->f_op->llseek;
 	}
 	return fn(file, offset, whence);
@@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
+	if (!file->f_op->read && !file->f_op->aio_read)
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 		return -EFAULT;
@@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 	const char __user *p;
 	ssize_t ret;
 
-	if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+	if (!file->f_op->write && !file->f_op->aio_write)
 		return -EINVAL;
 
 	old_fs = get_fs();
@@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+	if (!file->f_op->write && !file->f_op->aio_write)
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
@@ -727,11 +727,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	io_fn_t fn;
 	iov_fn_t fnv;
 
-	if (!file->f_op) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	ret = rw_copy_check_uvector(type, uvector, nr_segs,
 				    ARRAY_SIZE(iovstack), iovstack, &iov);
 	if (ret <= 0)
@@ -778,7 +773,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+	if (!file->f_op->aio_read && !file->f_op->read)
 		return -EINVAL;
 
 	return do_readv_writev(READ, file, vec, vlen, pos);
@@ -791,7 +786,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+	if (!file->f_op->aio_write && !file->f_op->write)
 		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -906,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	io_fn_t fn;
 	iov_fn_t fnv;
 
-	ret = -EINVAL;
-	if (!file->f_op)
-		goto out;
-
 	ret = -EFAULT;
 	if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
 		goto out;
@@ -965,7 +956,7 @@ static size_t compat_readv(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+	if (!file->f_op->aio_read && !file->f_op->read)
 		goto out;
 
 	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1032,7 +1023,7 @@ static size_t compat_writev(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+	if (!file->f_op->aio_write && !file->f_op->write)
 		goto out;
 
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/fs/readdir.c b/fs/readdir.c
index 93d71e574310..5b53d995cae6 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 	int res = -ENOTDIR;
-	if (!file->f_op || !file->f_op->iterate)
+	if (!file->f_op->iterate)
 		goto out;
 
 	res = security_file_permission(file, MAY_READ);
diff --git a/fs/select.c b/fs/select.c
index 35d4adc749d9..55278a36a658 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -455,7 +455,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 					const struct file_operations *f_op;
 					f_op = f.file->f_op;
 					mask = DEFAULT_POLLMASK;
-					if (f_op && f_op->poll) {
+					if (f_op->poll) {
 						wait_key_set(wait, in, out,
 							     bit, busy_flag);
 						mask = (*f_op->poll)(f.file, wait);
@@ -762,7 +762,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
 		mask = POLLNVAL;
 		if (f.file) {
 			mask = DEFAULT_POLLMASK;
-			if (f.file->f_op && f.file->f_op->poll) {
+			if (f.file->f_op->poll) {
 				pwait->_key = pollfd->events|POLLERR|POLLHUP;
 				pwait->_key |= busy_flag;
 				mask = f.file->f_op->poll(f.file, pwait);
diff --git a/fs/splice.c b/fs/splice.c
index 3b7ee656f3aa..46a08f772d7d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 	loff_t pos = sd->pos;
 	int more;
 
-	if (!likely(file->f_op && file->f_op->sendpage))
+	if (!likely(file->f_op->sendpage))
 		return -EINVAL;
 
 	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
@@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
 				loff_t *, size_t, unsigned int);
 
-	if (out->f_op && out->f_op->splice_write)
+	if (out->f_op->splice_write)
 		splice_write = out->f_op->splice_write;
 	else
 		splice_write = default_file_splice_write;
@@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
-	if (in->f_op && in->f_op->splice_read)
+	if (in->f_op->splice_read)
 		splice_read = in->f_op->splice_read;
 	else
 		splice_read = default_file_splice_read;
diff --git a/fs/sync.c b/fs/sync.c
index 905f3f6b3d85..6c0ca3b75758 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -177,7 +177,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
  */
 int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	if (!file->f_op || !file->f_op->fsync)
+	if (!file->f_op->fsync)
 		return -EINVAL;
 	return file->f_op->fsync(file, start, end, datasync);
 }
diff --git a/mm/memory.c b/mm/memory.c
index ca0003947115..e2bbba42604c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -681,7 +681,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 	if (vma->vm_ops)
 		printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n",
 		       vma->vm_ops->fault);
-	if (vma->vm_file && vma->vm_file->f_op)
+	if (vma->vm_file)
 		printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n",
 		       vma->vm_file->f_op->mmap);
 	dump_stack();
diff --git a/mm/mmap.c b/mm/mmap.c
index 9d548512ff8a..ab199dfc9e26 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1299,7 +1299,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 				vm_flags &= ~VM_MAYEXEC;
 			}
 
-			if (!file->f_op || !file->f_op->mmap)
+			if (!file->f_op->mmap)
 				return -ENODEV;
 			if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
 				return -EINVAL;
@@ -1951,7 +1951,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		return -ENOMEM;
 
 	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
+	if (file && file->f_op->get_unmapped_area)
 		get_area = file->f_op->get_unmapped_area;
 	addr = get_area(file, addr, len, pgoff, flags);
 	if (IS_ERR_VALUE(addr))
diff --git a/mm/nommu.c b/mm/nommu.c
index ecd1f158548e..9e6cb02cba64 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -937,7 +937,7 @@ static int validate_mmap_request(struct file *file,
 		struct address_space *mapping;
 
 		/* files must support mmap */
-		if (!file->f_op || !file->f_op->mmap)
+		if (!file->f_op->mmap)
 			return -ENODEV;
 
 		/* work out if what we've got could possibly be shared
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 3ffda1b3799b..9321a7763067 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -244,10 +244,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
 	if (!ts)
 		return -EREMOTEIO;
 
-	if (!ts->rd->f_op || !ts->rd->f_op->poll)
+	if (!ts->rd->f_op->poll)
 		return -EIO;
 
-	if (!ts->wr->f_op || !ts->wr->f_op->poll)
+	if (!ts->wr->f_op->poll)
 		return -EIO;
 
 	ret = ts->rd->f_op->poll(ts->rd, pt);

From 59aa0da8e232f990be793d0ea23ca74675b236b3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 21:34:53 -0400
Subject: [PATCH 019/101] initialize namespace_sem statically

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 5918fc31a636..827aea4c3242 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,7 +39,7 @@ static int mnt_group_start = 1;
 static struct list_head *mount_hashtable __read_mostly;
 static struct list_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
-static struct rw_semaphore namespace_sem;
+static DECLARE_RWSEM(namespace_sem);
 
 /* /sys/fs */
 struct kobject *fs_kobj;
@@ -2767,8 +2767,6 @@ void __init mnt_init(void)
 	unsigned u;
 	int err;
 
-	init_rwsem(&namespace_sem);
-
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 

From 44bb4385ce1cfb231d37cbc7bb6faf70f98b7b17 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 21:37:36 -0400
Subject: [PATCH 020/101] fs_is_visible only needs namespace_sem held shared

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 827aea4c3242..0e54ad6ccd8a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2869,7 +2869,7 @@ bool fs_fully_visible(struct file_system_type *type)
 	if (unlikely(!ns))
 		return false;
 
-	namespace_lock();
+	down_read(&namespace_sem);
 	list_for_each_entry(mnt, &ns->list, mnt_list) {
 		struct mount *child;
 		if (mnt->mnt.mnt_sb->s_type != type)
@@ -2890,7 +2890,7 @@ bool fs_fully_visible(struct file_system_type *type)
 	next:	;
 	}
 found:
-	namespace_unlock();
+	up_read(&namespace_sem);
 	return visible;
 }
 

From aa7a574d0c54cc5a0aceb7357b5097342c0844ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 22:22:16 -0400
Subject: [PATCH 021/101] dup_mnt_ns(): get rid of pointless grabbing of
 vfsmount_lock

mnt_list is protected by namespace_sem, not vfsmount_lock

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 0e54ad6ccd8a..17ff1470b405 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2444,9 +2444,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		return ERR_CAST(new);
 	}
 	new_ns->root = new;
-	br_write_lock(&vfsmount_lock);
 	list_add_tail(&new_ns->list, &new->mnt_list);
-	br_write_unlock(&vfsmount_lock);
 
 	/*
 	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts

From 6339dab869e07a81abd22970696dde6febee094a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 16 Sep 2013 22:41:01 -0400
Subject: [PATCH 022/101] do_remount(): pull touch_mnt_namespace() up

... and don't bother with dropping and regaining vfsmount_lock

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 17ff1470b405..3ee6e59ead55 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1849,14 +1849,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		br_write_lock(&vfsmount_lock);
 		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
-		br_write_unlock(&vfsmount_lock);
-	}
-	up_write(&sb->s_umount);
-	if (!err) {
-		br_write_lock(&vfsmount_lock);
 		touch_mnt_namespace(mnt->mnt_ns);
 		br_write_unlock(&vfsmount_lock);
 	}
+	up_write(&sb->s_umount);
 	return err;
 }
 

From 649a795affac8507657a72e761ce987d6967016b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 12:41:25 -0400
Subject: [PATCH 023/101] fold mntfree() into mntput_no_expire()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 3ee6e59ead55..691cd7c83275 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -872,28 +872,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	return ERR_PTR(err);
 }
 
-static inline void mntfree(struct mount *mnt)
-{
-	struct vfsmount *m = &mnt->mnt;
-	struct super_block *sb = m->mnt_sb;
-
-	/*
-	 * This probably indicates that somebody messed
-	 * up a mnt_want/drop_write() pair.  If this
-	 * happens, the filesystem was probably unable
-	 * to make r/w->r/o transitions.
-	 */
-	/*
-	 * The locking used to deal with mnt_count decrement provides barriers,
-	 * so mnt_get_writers() below is safe.
-	 */
-	WARN_ON(mnt_get_writers(mnt));
-	fsnotify_vfsmount_delete(m);
-	dput(m->mnt_root);
-	free_vfsmnt(mnt);
-	deactivate_super(sb);
-}
-
 static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
@@ -929,7 +907,22 @@ put_again:
 
 	list_del(&mnt->mnt_instance);
 	br_write_unlock(&vfsmount_lock);
-	mntfree(mnt);
+
+	/*
+	 * This probably indicates that somebody messed
+	 * up a mnt_want/drop_write() pair.  If this
+	 * happens, the filesystem was probably unable
+	 * to make r/w->r/o transitions.
+	 */
+	/*
+	 * The locking used to deal with mnt_count decrement provides barriers,
+	 * so mnt_get_writers() below is safe.
+	 */
+	WARN_ON(mnt_get_writers(mnt));
+	fsnotify_vfsmount_delete(&mnt->mnt);
+	dput(mnt->mnt.mnt_root);
+	deactivate_super(mnt->mnt.mnt_sb);
+	free_vfsmnt(mnt);
 }
 
 void mntput(struct vfsmount *mnt)

From 085e83ff0c26842e6b14cd4b471625f6dd8a7bfd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 12:54:06 -0400
Subject: [PATCH 024/101] fs/namespace.c: bury long-dead define

MNT_WRITER_UNDERFLOW_LIMIT has been missed 4 years ago when it became unused.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 691cd7c83275..cd7dbabf0c6c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 	return tmp & (HASH_SIZE - 1);
 }
 
-#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
-
 /*
  * allocation is serialized by namespace_sem, but we need the spinlock to
  * serialize with freeing.

From 22a7919299c58b768fa86200e5ac651d3e33a37f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 20:29:00 -0400
Subject: [PATCH 025/101] finish_automount() doesn't need vfsmount_lock for
 removal from expiry list

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index cd7dbabf0c6c..f57117d9f0ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2064,9 +2064,7 @@ fail:
 	/* remove m from any expiration list it may be on */
 	if (!list_empty(&mnt->mnt_expire)) {
 		namespace_lock();
-		br_write_lock(&vfsmount_lock);
 		list_del_init(&mnt->mnt_expire);
-		br_write_unlock(&vfsmount_lock);
 		namespace_unlock();
 	}
 	mntput(m);

From f6b742d8697ae0aeacb025e6e0ab3c61a6918846 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 20:30:00 -0400
Subject: [PATCH 026/101] mnt_set_expiry() doesn't need vfsmount_lock

->mnt_expire is protected by namespace_sem

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index f57117d9f0ee..e45503d62fed 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2080,11 +2080,9 @@ fail:
 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
 {
 	namespace_lock();
-	br_write_lock(&vfsmount_lock);
 
 	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
 
-	br_write_unlock(&vfsmount_lock);
 	namespace_unlock();
 }
 EXPORT_SYMBOL(mnt_set_expiry);

From 9559f68915024ece8be3781003f25aef3677e574 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 20:47:57 -0400
Subject: [PATCH 027/101] fold dup_mnt_ns() into its only surviving caller

should've been done 6 years ago...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index e45503d62fed..c8d214ce30c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2397,20 +2397,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	return new_ns;
 }
 
-/*
- * Allocate a new namespace structure and populate it with contents
- * copied from the namespace of the passed in task structure.
- */
-static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-		struct user_namespace *user_ns, struct fs_struct *fs)
+struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
 	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct mount *p, *q;
-	struct mount *old = mnt_ns->root;
+	struct mount *old;
 	struct mount *new;
 	int copy_flags;
 
+	BUG_ON(!ns);
+
+	if (likely(!(flags & CLONE_NEWNS))) {
+		get_mnt_ns(ns);
+		return ns;
+	}
+
+	old = ns->root;
+
 	new_ns = alloc_mnt_ns(user_ns);
 	if (IS_ERR(new_ns))
 		return new_ns;
@@ -2418,7 +2423,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	namespace_lock();
 	/* First pass: copy the tree topology */
 	copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
-	if (user_ns != mnt_ns->user_ns)
+	if (user_ns != ns->user_ns)
 		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
 	if (IS_ERR(new)) {
@@ -2438,13 +2443,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	q = new;
 	while (p) {
 		q->mnt_ns = new_ns;
-		if (fs) {
-			if (&p->mnt == fs->root.mnt) {
-				fs->root.mnt = mntget(&q->mnt);
+		if (new_fs) {
+			if (&p->mnt == new_fs->root.mnt) {
+				new_fs->root.mnt = mntget(&q->mnt);
 				rootmnt = &p->mnt;
 			}
-			if (&p->mnt == fs->pwd.mnt) {
-				fs->pwd.mnt = mntget(&q->mnt);
+			if (&p->mnt == new_fs->pwd.mnt) {
+				new_fs->pwd.mnt = mntget(&q->mnt);
 				pwdmnt = &p->mnt;
 			}
 		}
@@ -2465,23 +2470,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	return new_ns;
 }
 
-struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-		struct user_namespace *user_ns, struct fs_struct *new_fs)
-{
-	struct mnt_namespace *new_ns;
-
-	BUG_ON(!ns);
-	get_mnt_ns(ns);
-
-	if (!(flags & CLONE_NEWNS))
-		return ns;
-
-	new_ns = dup_mnt_ns(ns, user_ns, new_fs);
-
-	put_mnt_ns(ns);
-	return new_ns;
-}
-
 /**
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint

From aba809cf0944fdc5a83ef8b5864cdce2a99b2513 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Sep 2013 23:10:55 -0400
Subject: [PATCH 028/101] namespace.c: get rid of mnt_ghosts

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h     |  2 +-
 fs/namespace.c | 22 +++++++---------------
 fs/pnode.c     |  3 +--
 3 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 64a858143ff9..3168dc41bad5 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -55,7 +55,7 @@ struct mount {
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
-	int mnt_ghosts;
+	struct path mnt_ex_mountpoint;
 };
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
diff --git a/fs/namespace.c b/fs/namespace.c
index c8d214ce30c6..846ea43ab0c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1136,20 +1136,8 @@ static void namespace_unlock(void)
 	while (!list_empty(&head)) {
 		mnt = list_first_entry(&head, struct mount, mnt_hash);
 		list_del_init(&mnt->mnt_hash);
-		if (mnt_has_parent(mnt)) {
-			struct dentry *dentry;
-			struct mount *m;
-
-			br_write_lock(&vfsmount_lock);
-			dentry = mnt->mnt_mountpoint;
-			m = mnt->mnt_parent;
-			mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-			mnt->mnt_parent = mnt;
-			m->mnt_ghosts--;
-			br_write_unlock(&vfsmount_lock);
-			dput(dentry);
-			mntput(&m->mnt);
-		}
+		if (mnt->mnt_ex_mountpoint.mnt)
+			path_put(&mnt->mnt_ex_mountpoint);
 		mntput(&mnt->mnt);
 	}
 }
@@ -1181,8 +1169,12 @@ void umount_tree(struct mount *mnt, int propagate)
 		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
-			p->mnt_parent->mnt_ghosts++;
 			put_mountpoint(p->mnt_mp);
+			/* move the reference to mountpoint into ->mnt_ex_mountpoint */
+			p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
+			p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
+			p->mnt_mountpoint = p->mnt.mnt_root;
+			p->mnt_parent = p;
 			p->mnt_mp = NULL;
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
diff --git a/fs/pnode.c b/fs/pnode.c
index 9af0df15256e..3cfd48cf887e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -278,8 +278,7 @@ out:
  */
 static inline int do_refcount_check(struct mount *mnt, int count)
 {
-	int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
-	return (mycount > count);
+	return mnt_get_count(mnt) > count;
 }
 
 /*

From aab407fc5c0ce63e9fd4a34a790d7290d3e116a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Sep 2013 10:59:59 -0400
Subject: [PATCH 029/101] don't bother with vfsmount_lock in mounts_poll()

wake_up_interruptible/poll_wait provide sufficient barriers;
just use ACCESS_ONCE() to fetch ns->event and that's it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc_namespace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5fe34c355e85..439406e081af 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
 	struct proc_mounts *p = proc_mounts(file->private_data);
 	struct mnt_namespace *ns = p->ns;
 	unsigned res = POLLIN | POLLRDNORM;
+	int event;
 
 	poll_wait(file, &p->ns->poll, wait);
 
-	br_read_lock(&vfsmount_lock);
-	if (p->m.poll_event != ns->event) {
-		p->m.poll_event = ns->event;
+	event = ACCESS_ONCE(ns->event);
+	if (p->m.poll_event != event) {
+		p->m.poll_event = event;
 		res |= POLLERR | POLLPRI;
 	}
-	br_read_unlock(&vfsmount_lock);
 
 	return res;
 }

From 719ea2fbb553ab3f61a174a4b5861289dcc46cb1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Sep 2013 11:24:49 -0400
Subject: [PATCH 030/101] new helpers: lock_mount_hash/unlock_mount_hash

aka br_write_{lock,unlock} of vfsmount_lock.  Inlines in fs/mount.h,
vfsmount_lock extern moved over there as well.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs-writeback.c |  1 +
 fs/internal.h     |  4 --
 fs/mount.h        | 13 +++++++
 fs/namespace.c    | 96 +++++++++++++++++++++++------------------------
 fs/pnode.c        |  4 +-
 5 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f4935b8f208..09c11329a17c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,6 +26,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/tracepoint.h>
+#include <linux/device.h>
 #include "internal.h"
 
 /*
diff --git a/fs/internal.h b/fs/internal.h
index 513e0d859a6c..4a11e75ce14d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,8 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/lglock.h>
-
 struct super_block;
 struct file_system_type;
 struct linux_binprm;
@@ -62,8 +60,6 @@ extern int sb_prepare_remount_readonly(struct super_block *);
 
 extern void __init mnt_init(void);
 
-extern struct lglock vfsmount_lock;
-
 extern int __mnt_want_write(struct vfsmount *);
 extern int __mnt_want_write_file(struct file *);
 extern void __mnt_drop_write(struct vfsmount *);
diff --git a/fs/mount.h b/fs/mount.h
index 3168dc41bad5..7076f25af35d 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,6 +1,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
+#include <linux/lglock.h>
 
 struct mnt_namespace {
 	atomic_t		count;
@@ -83,6 +84,18 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 	atomic_inc(&ns->count);
 }
 
+extern struct lglock vfsmount_lock;
+
+static inline void lock_mount_hash(void)
+{
+	br_write_lock(&vfsmount_lock);
+}
+
+static inline void unlock_mount_hash(void)
+{
+	br_write_unlock(&vfsmount_lock);
+}
+
 struct proc_mounts {
 	struct seq_file m;
 	struct mnt_namespace *ns;
diff --git a/fs/namespace.c b/fs/namespace.c
index 846ea43ab0c6..5cbe8cefadb5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -456,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt)
 {
 	int ret = 0;
 
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
 	/*
 	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -490,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt)
 	 */
 	smp_wmb();
 	mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	return ret;
 }
 
 static void __mnt_unmake_readonly(struct mount *mnt)
 {
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	mnt->mnt.mnt_flags &= ~MNT_READONLY;
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 }
 
 int sb_prepare_remount_readonly(struct super_block *sb)
@@ -510,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 	if (atomic_long_read(&sb->s_remove_count))
 		return -EBUSY;
 
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
 		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
 			mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
@@ -532,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
 			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
 	return err;
 }
@@ -794,9 +794,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	mnt->mnt.mnt_sb = root->d_sb;
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	mnt->mnt_parent = mnt;
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	return &mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
@@ -837,9 +837,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	mnt->mnt.mnt_root = dget(root);
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	mnt->mnt_parent = mnt;
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
 	if ((flag & CL_SLAVE) ||
 	    ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
@@ -883,28 +883,28 @@ put_again:
 	}
 	br_read_unlock(&vfsmount_lock);
 
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 		return;
 	}
 #else
 	mnt_add_count(mnt, -1);
 	if (likely(mnt_get_count(mnt)))
 		return;
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 #endif
 	if (unlikely(mnt->mnt_pinned)) {
 		mnt_add_count(mnt, mnt->mnt_pinned + 1);
 		mnt->mnt_pinned = 0;
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 		acct_auto_close_mnt(&mnt->mnt);
 		goto put_again;
 	}
 
 	list_del(&mnt->mnt_instance);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
 	/*
 	 * This probably indicates that somebody messed
@@ -945,21 +945,21 @@ EXPORT_SYMBOL(mntget);
 
 void mnt_pin(struct vfsmount *mnt)
 {
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	real_mount(mnt)->mnt_pinned++;
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 }
 EXPORT_SYMBOL(mnt_pin);
 
 void mnt_unpin(struct vfsmount *m)
 {
 	struct mount *mnt = real_mount(m);
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	if (mnt->mnt_pinned) {
 		mnt_add_count(mnt, 1);
 		mnt->mnt_pinned--;
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 }
 EXPORT_SYMBOL(mnt_unpin);
 
@@ -1076,12 +1076,12 @@ int may_umount_tree(struct vfsmount *m)
 	BUG_ON(!m);
 
 	/* write lock needed for mnt_get_count */
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
 		actual_refs += mnt_get_count(p);
 		minimum_refs += 2;
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
 	if (actual_refs > minimum_refs)
 		return 0;
@@ -1108,10 +1108,10 @@ int may_umount(struct vfsmount *mnt)
 {
 	int ret = 1;
 	down_read(&namespace_sem);
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	if (propagate_mount_busy(real_mount(mnt), 2))
 		ret = 0;
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	up_read(&namespace_sem);
 	return ret;
 }
@@ -1208,12 +1208,12 @@ static int do_umount(struct mount *mnt, int flags)
 		 * probably don't strictly need the lock here if we examined
 		 * all race cases, but it's a slowpath.
 		 */
-		br_write_lock(&vfsmount_lock);
+		lock_mount_hash();
 		if (mnt_get_count(mnt) != 2) {
-			br_write_unlock(&vfsmount_lock);
+			unlock_mount_hash();
 			return -EBUSY;
 		}
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 
 		if (!xchg(&mnt->mnt_expiry_mark, 1))
 			return -EAGAIN;
@@ -1255,7 +1255,7 @@ static int do_umount(struct mount *mnt, int flags)
 	}
 
 	namespace_lock();
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	event++;
 
 	if (!(flags & MNT_DETACH))
@@ -1267,7 +1267,7 @@ static int do_umount(struct mount *mnt, int flags)
 			umount_tree(mnt, 1);
 		retval = 0;
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	namespace_unlock();
 	return retval;
 }
@@ -1410,18 +1410,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 			q = clone_mnt(p, p->mnt.mnt_root, flag);
 			if (IS_ERR(q))
 				goto out;
-			br_write_lock(&vfsmount_lock);
+			lock_mount_hash();
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, parent, p->mnt_mp);
-			br_write_unlock(&vfsmount_lock);
+			unlock_mount_hash();
 		}
 	}
 	return res;
 out:
 	if (res) {
-		br_write_lock(&vfsmount_lock);
+		lock_mount_hash();
 		umount_tree(res, 0);
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 	}
 	return q;
 }
@@ -1443,9 +1443,9 @@ struct vfsmount *collect_mounts(struct path *path)
 void drop_collected_mounts(struct vfsmount *mnt)
 {
 	namespace_lock();
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	umount_tree(real_mount(mnt), 0);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -1572,7 +1572,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 	if (err)
 		goto out_cleanup_ids;
 
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 
 	if (IS_MNT_SHARED(dest_mnt)) {
 		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1591,7 +1591,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		list_del_init(&child->mnt_hash);
 		commit_tree(child);
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
 	return 0;
 
@@ -1693,10 +1693,10 @@ static int do_change_type(struct path *path, int flag)
 			goto out_unlock;
 	}
 
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
 		change_mnt_propagation(m, type);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 
  out_unlock:
 	namespace_unlock();
@@ -1768,9 +1768,9 @@ static int do_loopback(struct path *path, const char *old_name,
 
 	err = graft_tree(mnt, parent, mp);
 	if (err) {
-		br_write_lock(&vfsmount_lock);
+		lock_mount_hash();
 		umount_tree(mnt, 0);
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 	}
 out2:
 	unlock_mount(mp);
@@ -1829,11 +1829,11 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	else
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
-		br_write_lock(&vfsmount_lock);
+		lock_mount_hash();
 		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
 		touch_mnt_namespace(mnt->mnt_ns);
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 	}
 	up_write(&sb->s_umount);
 	return err;
@@ -2093,7 +2093,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 		return;
 
 	namespace_lock();
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 
 	/* extract from the expiration list every vfsmount that matches the
 	 * following criteria:
@@ -2112,7 +2112,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 		touch_mnt_namespace(mnt->mnt_ns);
 		umount_tree(mnt, 1);
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -2662,7 +2662,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	if (!is_path_reachable(old_mnt, old.dentry, &new))
 		goto out4;
 	root_mp->m_count++; /* pin it so it won't go away */
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	detach_mnt(new_mnt, &parent_path);
 	detach_mnt(root_mnt, &root_parent);
 	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
@@ -2674,7 +2674,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* mount new_root on / */
 	attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
 	put_mountpoint(root_mp);
 	error = 0;
@@ -2784,9 +2784,9 @@ void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
 	if (!IS_ERR_OR_NULL(mnt)) {
-		br_write_lock(&vfsmount_lock);
+		lock_mount_hash();
 		real_mount(mnt)->mnt_ns = NULL;
-		br_write_unlock(&vfsmount_lock);
+		unlock_mount_hash();
 		mntput(mnt);
 	}
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index 3cfd48cf887e..58933fd149ad 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
 		prev_src_mnt  = child;
 	}
 out:
-	br_write_lock(&vfsmount_lock);
+	lock_mount_hash();
 	while (!list_empty(&tmp_list)) {
 		child = list_first_entry(&tmp_list, struct mount, mnt_hash);
 		umount_tree(child, 0);
 	}
-	br_write_unlock(&vfsmount_lock);
+	unlock_mount_hash();
 	return ret;
 }
 

From 966c1f75f8e1e8e44d8277f7cc3598f6abbdda2f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Sep 2013 18:09:05 -0400
Subject: [PATCH 031/101] isofs: don't pass dentry to isofs_hash{i,}_common()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/isofs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index e5d408a7ea4a..4a9e10ea13f2 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -181,7 +181,7 @@ struct iso9660_options{
  * Compute the hash for the isofs name corresponding to the dentry.
  */
 static int
-isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hash_common(struct qstr *qstr, int ms)
 {
 	const char *name;
 	int len;
@@ -202,7 +202,7 @@ isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
  * Compute the hash for the isofs name corresponding to the dentry.
  */
 static int
-isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hashi_common(struct qstr *qstr, int ms)
 {
 	const char *name;
 	int len;
@@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common(
 static int
 isofs_hash(const struct dentry *dentry, struct qstr *qstr)
 {
-	return isofs_hash_common(dentry, qstr, 0);
+	return isofs_hash_common(qstr, 0);
 }
 
 static int
 isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
 {
-	return isofs_hashi_common(dentry, qstr, 0);
+	return isofs_hashi_common(qstr, 0);
 }
 
 static int
@@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
 static int
 isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
 {
-	return isofs_hash_common(dentry, qstr, 1);
+	return isofs_hash_common(qstr, 1);
 }
 
 static int
 isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr)
 {
-	return isofs_hashi_common(dentry, qstr, 1);
+	return isofs_hashi_common(qstr, 1);
 }
 
 static int

From 7eb5e8826911f2792179f99e77e75fbb7ef53a4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Oct 2013 15:09:58 -0400
Subject: [PATCH 032/101] uninline destroy_super(), consolidate alloc_super()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 206 ++++++++++++++++++++++-------------------------------
 1 file changed, 86 insertions(+), 120 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 0225c20f8770..efa6e488a95c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -129,33 +129,27 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	return total_objects;
 }
 
-static int init_sb_writers(struct super_block *s, struct file_system_type *type)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		err = percpu_counter_init(&s->s_writers.counter[i], 0);
-		if (err < 0)
-			goto err_out;
-		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-				 &type->s_writers_key[i], 0);
-	}
-	init_waitqueue_head(&s->s_writers.wait);
-	init_waitqueue_head(&s->s_writers.wait_unfrozen);
-	return 0;
-err_out:
-	while (--i >= 0)
-		percpu_counter_destroy(&s->s_writers.counter[i]);
-	return err;
-}
-
-static void destroy_sb_writers(struct super_block *s)
+/**
+ *	destroy_super	-	frees a superblock
+ *	@s: superblock to free
+ *
+ *	Frees a superblock.
+ */
+static void destroy_super(struct super_block *s)
 {
 	int i;
-
+	list_lru_destroy(&s->s_dentry_lru);
+	list_lru_destroy(&s->s_inode_lru);
+#ifdef CONFIG_SMP
+	free_percpu(s->s_files);
+#endif
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
 		percpu_counter_destroy(&s->s_writers.counter[i]);
+	security_sb_free(s);
+	WARN_ON(!list_empty(&s->s_mounts));
+	kfree(s->s_subtype);
+	kfree(s->s_options);
+	kfree(s);
 }
 
 /**
@@ -170,111 +164,83 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static const struct super_operations default_op;
+	int i;
 
-	if (s) {
-		if (security_sb_alloc(s))
-			goto out_free_sb;
+	if (!s)
+		return NULL;
+
+	if (security_sb_alloc(s))
+		goto fail;
 
 #ifdef CONFIG_SMP
-		s->s_files = alloc_percpu(struct list_head);
-		if (!s->s_files)
-			goto err_out;
-		else {
-			int i;
-
-			for_each_possible_cpu(i)
-				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
-		}
+	s->s_files = alloc_percpu(struct list_head);
+	if (!s->s_files)
+		goto fail;
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
 #else
-		INIT_LIST_HEAD(&s->s_files);
+	INIT_LIST_HEAD(&s->s_files);
 #endif
-		if (init_sb_writers(s, type))
-			goto err_out;
-		s->s_flags = flags;
-		s->s_bdi = &default_backing_dev_info;
-		INIT_HLIST_NODE(&s->s_instances);
-		INIT_HLIST_BL_HEAD(&s->s_anon);
-		INIT_LIST_HEAD(&s->s_inodes);
-
-		if (list_lru_init(&s->s_dentry_lru))
-			goto err_out;
-		if (list_lru_init(&s->s_inode_lru))
-			goto err_out_dentry_lru;
-
-		INIT_LIST_HEAD(&s->s_mounts);
-		init_rwsem(&s->s_umount);
-		lockdep_set_class(&s->s_umount, &type->s_umount_key);
-		/*
-		 * sget() can have s_umount recursion.
-		 *
-		 * When it cannot find a suitable sb, it allocates a new
-		 * one (this one), and tries again to find a suitable old
-		 * one.
-		 *
-		 * In case that succeeds, it will acquire the s_umount
-		 * lock of the old one. Since these are clearly distrinct
-		 * locks, and this object isn't exposed yet, there's no
-		 * risk of deadlocks.
-		 *
-		 * Annotate this by putting this lock in a different
-		 * subclass.
-		 */
-		down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
-		s->s_count = 1;
-		atomic_set(&s->s_active, 1);
-		mutex_init(&s->s_vfs_rename_mutex);
-		lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
-		mutex_init(&s->s_dquot.dqio_mutex);
-		mutex_init(&s->s_dquot.dqonoff_mutex);
-		init_rwsem(&s->s_dquot.dqptr_sem);
-		s->s_maxbytes = MAX_NON_LFS;
-		s->s_op = &default_op;
-		s->s_time_gran = 1000000000;
-		s->cleancache_poolid = -1;
-
-		s->s_shrink.seeks = DEFAULT_SEEKS;
-		s->s_shrink.scan_objects = super_cache_scan;
-		s->s_shrink.count_objects = super_cache_count;
-		s->s_shrink.batch = 1024;
-		s->s_shrink.flags = SHRINKER_NUMA_AWARE;
+	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+			goto fail;
+		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+				 &type->s_writers_key[i], 0);
 	}
-out:
+	init_waitqueue_head(&s->s_writers.wait);
+	init_waitqueue_head(&s->s_writers.wait_unfrozen);
+	s->s_flags = flags;
+	s->s_bdi = &default_backing_dev_info;
+	INIT_HLIST_NODE(&s->s_instances);
+	INIT_HLIST_BL_HEAD(&s->s_anon);
+	INIT_LIST_HEAD(&s->s_inodes);
+
+	if (list_lru_init(&s->s_dentry_lru))
+		goto fail;
+	if (list_lru_init(&s->s_inode_lru))
+		goto fail;
+
+	INIT_LIST_HEAD(&s->s_mounts);
+	init_rwsem(&s->s_umount);
+	lockdep_set_class(&s->s_umount, &type->s_umount_key);
+	/*
+	 * sget() can have s_umount recursion.
+	 *
+	 * When it cannot find a suitable sb, it allocates a new
+	 * one (this one), and tries again to find a suitable old
+	 * one.
+	 *
+	 * In case that succeeds, it will acquire the s_umount
+	 * lock of the old one. Since these are clearly distrinct
+	 * locks, and this object isn't exposed yet, there's no
+	 * risk of deadlocks.
+	 *
+	 * Annotate this by putting this lock in a different
+	 * subclass.
+	 */
+	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
+	s->s_count = 1;
+	atomic_set(&s->s_active, 1);
+	mutex_init(&s->s_vfs_rename_mutex);
+	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
+	mutex_init(&s->s_dquot.dqio_mutex);
+	mutex_init(&s->s_dquot.dqonoff_mutex);
+	init_rwsem(&s->s_dquot.dqptr_sem);
+	s->s_maxbytes = MAX_NON_LFS;
+	s->s_op = &default_op;
+	s->s_time_gran = 1000000000;
+	s->cleancache_poolid = -1;
+
+	s->s_shrink.seeks = DEFAULT_SEEKS;
+	s->s_shrink.scan_objects = super_cache_scan;
+	s->s_shrink.count_objects = super_cache_count;
+	s->s_shrink.batch = 1024;
+	s->s_shrink.flags = SHRINKER_NUMA_AWARE;
 	return s;
 
-err_out_dentry_lru:
-	list_lru_destroy(&s->s_dentry_lru);
-err_out:
-	security_sb_free(s);
-#ifdef CONFIG_SMP
-	if (s->s_files)
-		free_percpu(s->s_files);
-#endif
-	destroy_sb_writers(s);
-out_free_sb:
-	kfree(s);
-	s = NULL;
-	goto out;
-}
-
-/**
- *	destroy_super	-	frees a superblock
- *	@s: superblock to free
- *
- *	Frees a superblock.
- */
-static inline void destroy_super(struct super_block *s)
-{
-	list_lru_destroy(&s->s_dentry_lru);
-	list_lru_destroy(&s->s_inode_lru);
-#ifdef CONFIG_SMP
-	free_percpu(s->s_files);
-#endif
-	destroy_sb_writers(s);
-	security_sb_free(s);
-	WARN_ON(!list_empty(&s->s_mounts));
-	kfree(s->s_subtype);
-	kfree(s->s_options);
-	kfree(s);
+fail:
+	destroy_super(s);
+	return NULL;
 }
 
 /* Superblock refcounting  */

From 474279dc0f7745124fc76b474c8dc1294f8e87ce Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Oct 2013 16:11:26 -0400
Subject: [PATCH 033/101] split __lookup_mnt() in two functions

Instead of passing the direction as argument (and checking it on every
step through the hash chain), just have separate __lookup_mnt() and
__lookup_mnt_last().  And use the standard iterators...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h     |  3 ++-
 fs/namei.c     |  4 ++--
 fs/namespace.c | 42 +++++++++++++++++++++++-------------------
 fs/pnode.c     |  6 +++---
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 7076f25af35d..f0866076de6e 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -77,7 +77,8 @@ static inline int is_mounted(struct vfsmount *mnt)
 	return !IS_ERR_OR_NULL(real_mount(mnt));
 }
 
-extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
+extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 
 static inline void get_mnt_ns(struct mnt_namespace *ns)
 {
diff --git a/fs/namei.c b/fs/namei.c
index 645268f23eb6..1f844fbfce72 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1111,7 +1111,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 		if (!d_mountpoint(path->dentry))
 			break;
 
-		mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+		mounted = __lookup_mnt(path->mnt, path->dentry);
 		if (!mounted)
 			break;
 		path->mnt = &mounted->mnt;
@@ -1132,7 +1132,7 @@ static void follow_mount_rcu(struct nameidata *nd)
 {
 	while (d_mountpoint(nd->path.dentry)) {
 		struct mount *mounted;
-		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
 		if (!mounted)
 			break;
 		nd->path.mnt = &mounted->mnt;
diff --git a/fs/namespace.c b/fs/namespace.c
index 5cbe8cefadb5..500202ce10db 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -548,29 +548,33 @@ static void free_vfsmnt(struct mount *mnt)
 }
 
 /*
- * find the first or last mount at @dentry on vfsmount @mnt depending on
- * @dir. If @dir is set return the first mount else return the last mount.
+ * find the first mount at @dentry on vfsmount @mnt.
  * vfsmount_lock must be held for read or write.
  */
-struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
-			      int dir)
+struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct list_head *head = mount_hashtable + hash(mnt, dentry);
-	struct list_head *tmp = head;
-	struct mount *p, *found = NULL;
+	struct mount *p;
 
-	for (;;) {
-		tmp = dir ? tmp->next : tmp->prev;
-		p = NULL;
-		if (tmp == head)
-			break;
-		p = list_entry(tmp, struct mount, mnt_hash);
-		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
-			found = p;
-			break;
-		}
-	}
-	return found;
+	list_for_each_entry(p, head, mnt_hash)
+		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+			return p;
+	return NULL;
+}
+
+/*
+ * find the last mount at @dentry on vfsmount @mnt.
+ * vfsmount_lock must be held for read or write.
+ */
+struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+{
+	struct list_head *head = mount_hashtable + hash(mnt, dentry);
+	struct mount *p;
+
+	list_for_each_entry_reverse(p, head, mnt_hash)
+		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+			return p;
+	return NULL;
 }
 
 /*
@@ -594,7 +598,7 @@ struct vfsmount *lookup_mnt(struct path *path)
 	struct mount *child_mnt;
 
 	br_read_lock(&vfsmount_lock);
-	child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
+	child_mnt = __lookup_mnt(path->mnt, path->dentry);
 	if (child_mnt) {
 		mnt_add_count(child_mnt, 1);
 		br_read_unlock(&vfsmount_lock);
diff --git a/fs/pnode.c b/fs/pnode.c
index 58933fd149ad..c7221bb19801 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -310,7 +310,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
 
 	for (m = propagation_next(parent, parent); m;
 	     		m = propagation_next(m, parent)) {
-		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
+		child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
 		if (child && list_empty(&child->mnt_mounts) &&
 		    (ret = do_refcount_check(child, 1)))
 			break;
@@ -332,8 +332,8 @@ static void __propagate_umount(struct mount *mnt)
 	for (m = propagation_next(parent, parent); m;
 			m = propagation_next(m, parent)) {
 
-		struct mount *child = __lookup_mnt(&m->mnt,
-					mnt->mnt_mountpoint, 0);
+		struct mount *child = __lookup_mnt_last(&m->mnt,
+						mnt->mnt_mountpoint);
 		/*
 		 * umount the child only if the child has no
 		 * other children

From 94e92a6e772e009e27083a7f42d49a179e5f668c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Oct 2013 16:18:06 -0400
Subject: [PATCH 034/101] move taking vfsmount_lock down into prepend_path()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 41000305d716..d888223a5486 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2889,6 +2889,7 @@ static int prepend_path(const struct path *path,
 	char *bptr;
 	int blen;
 
+	br_read_lock(&vfsmount_lock);
 	rcu_read_lock();
 restart:
 	bptr = *buffer;
@@ -2935,6 +2936,7 @@ restart:
 		goto restart;
 	}
 	done_seqretry(&rename_lock, seq);
+	br_read_unlock(&vfsmount_lock);
 
 	if (error >= 0 && bptr == *buffer) {
 		if (--blen < 0)
@@ -2971,9 +2973,7 @@ char *__d_path(const struct path *path,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
-	br_read_lock(&vfsmount_lock);
 	error = prepend_path(path, root, &res, &buflen);
-	br_read_unlock(&vfsmount_lock);
 
 	if (error < 0)
 		return ERR_PTR(error);
@@ -2990,9 +2990,7 @@ char *d_absolute_path(const struct path *path,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
-	br_read_lock(&vfsmount_lock);
 	error = prepend_path(path, &root, &res, &buflen);
-	br_read_unlock(&vfsmount_lock);
 
 	if (error > 1)
 		error = -EINVAL;
@@ -3067,9 +3065,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
 
 	rcu_read_lock();
 	get_fs_root_rcu(current->fs, &root);
-	br_read_lock(&vfsmount_lock);
 	error = path_with_deleted(path, &root, &res, &buflen);
-	br_read_unlock(&vfsmount_lock);
 	rcu_read_unlock();
 
 	if (error < 0)
@@ -3224,7 +3220,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
 
 	error = -ENOENT;
-	br_read_lock(&vfsmount_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		char *cwd = page + PATH_MAX;
@@ -3232,7 +3227,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 
 		prepend(&cwd, &buflen, "\0", 1);
 		error = prepend_path(&pwd, &root, &cwd, &buflen);
-		br_read_unlock(&vfsmount_lock);
 		rcu_read_unlock();
 
 		if (error < 0)
@@ -3253,7 +3247,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 				error = -EFAULT;
 		}
 	} else {
-		br_read_unlock(&vfsmount_lock);
 		rcu_read_unlock();
 	}
 

From b70a80e7a133a0c86f2fa078e7c144597c516415 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Oct 2013 16:44:54 +0200
Subject: [PATCH 035/101] vfs: introduce d_instantiate_no_diralias()

...which just returns -EBUSY if a directory alias would be created.

This is to be used by fuse mkdir to make sure that a buggy or malicious
userspace filesystem doesn't do anything nasty.  Previously fuse used a
private mutex for this purpose, which can now go away.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/dcache.c            | 27 +++++++++++++++++++++++++++
 fs/fuse/dir.c          | 40 +++++-----------------------------------
 fs/fuse/fuse_i.h       |  3 ---
 fs/fuse/inode.c        |  2 --
 include/linux/dcache.h |  1 +
 5 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index d888223a5486..c8e83d0d61ac 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1801,6 +1801,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 
 EXPORT_SYMBOL(d_instantiate_unique);
 
+/**
+ * d_instantiate_no_diralias - instantiate a non-aliased dentry
+ * @entry: dentry to complete
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry.  If a directory alias is found, then
+ * return an error (and drop inode).  Together with d_materialise_unique() this
+ * guarantees that a directory inode may never have more than one alias.
+ */
+int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
+{
+	BUG_ON(!hlist_unhashed(&entry->d_alias));
+
+	spin_lock(&inode->i_lock);
+	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		return -EBUSY;
+	}
+	__d_instantiate(entry, inode);
+	spin_unlock(&inode->i_lock);
+	security_d_instantiate(entry, inode);
+
+	return 0;
+}
+EXPORT_SYMBOL(d_instantiate_no_diralias);
+
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7989f2ab4c4..c3eb2c46c8f1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 	return err;
 }
 
-static struct dentry *fuse_materialise_dentry(struct dentry *dentry,
-					      struct inode *inode)
-{
-	struct dentry *newent;
-
-	if (inode && S_ISDIR(inode->i_mode)) {
-		struct fuse_conn *fc = get_fuse_conn(inode);
-
-		mutex_lock(&fc->inst_mutex);
-		newent = d_materialise_unique(dentry, inode);
-		mutex_unlock(&fc->inst_mutex);
-	} else {
-		newent = d_materialise_unique(dentry, inode);
-	}
-
-	return newent;
-}
-
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 				  unsigned int flags)
 {
@@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 		goto out_iput;
 
-	newent = fuse_materialise_dentry(entry, inode);
+	newent = d_materialise_unique(entry, inode);
 	err = PTR_ERR(newent);
 	if (IS_ERR(newent))
 		goto out_err;
@@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	}
 	kfree(forget);
 
-	if (S_ISDIR(inode->i_mode)) {
-		struct dentry *alias;
-		mutex_lock(&fc->inst_mutex);
-		alias = d_find_alias(inode);
-		if (alias) {
-			/* New directory must have moved since mkdir */
-			mutex_unlock(&fc->inst_mutex);
-			dput(alias);
-			iput(inode);
-			return -EBUSY;
-		}
-		d_instantiate(entry, inode);
-		mutex_unlock(&fc->inst_mutex);
-	} else
-		d_instantiate(entry, inode);
+	err = d_instantiate_no_diralias(entry, inode);
+	if (err)
+		return err;
 
 	fuse_change_entry_timeout(entry, &outarg);
 	fuse_invalidate_attr(dir);
@@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	alias = fuse_materialise_dentry(dentry, inode);
+	alias = d_materialise_unique(dentry, inode);
 	err = PTR_ERR(alias);
 	if (IS_ERR(alias))
 		goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5b9e6f3b6aef..fe446c222e3f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -374,9 +374,6 @@ struct fuse_conn {
 	/** Lock protecting accessess to  members of this structure */
 	spinlock_t lock;
 
-	/** Mutex protecting against directory alias creation */
-	struct mutex inst_mutex;
-
 	/** Refcount */
 	atomic_t count;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a8ce6dab60a0..1c15613c64f8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc)
 {
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
-	mutex_init(&fc->inst_mutex);
 	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
 	init_waitqueue_head(&fc->waitq);
@@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (atomic_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
-		mutex_destroy(&fc->inst_mutex);
 		fc->release(fc);
 	}
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59066e0b4ff1..716c3760ee39 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -224,6 +224,7 @@ static inline int dname_external(const struct dentry *dentry)
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);

From e2fec7c35582e7bb41cccc1761faa2af4dc17627 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 17:06:56 -0400
Subject: [PATCH 036/101] make freeing super_block rcu-delayed

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 2 +-
 include/linux/fs.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/super.c b/fs/super.c
index efa6e488a95c..743bb7118053 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -149,7 +149,7 @@ static void destroy_super(struct super_block *s)
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
-	kfree(s);
+	kfree_rcu(s, rcu);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b09e4e1d747a..2ab8a67ee054 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1330,6 +1330,7 @@ struct super_block {
 	 */
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
+	struct rcu_head		rcu;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);

From 30687e0a47e89f56489ab73965ee88231e611986 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 12:25:10 -0400
Subject: [PATCH 037/101] hpfs: make freeing sbi and codetables rcu-delayed

makes ->d_hash() and ->d_compare() safety in RCU mode independent
from vfsmount_lock

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hpfs/hpfs_fn.h |  1 +
 fs/hpfs/super.c   | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1b398636e990..6797bf80f6e2 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -80,6 +80,7 @@ struct hpfs_sb_info {
 	unsigned sb_c_bitmap;		/* current bitmap */
 	unsigned sb_max_fwd_alloc;	/* max forwad allocation */
 	int sb_timeshift;
+	struct rcu_head rcu;
 };
 
 /* Four 512-byte buffers and the 2k block obtained by concatenating them */
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 4334cda8dba1..b8d01ef6f531 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -101,18 +101,24 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
 	return 0;
 }
 
+static void free_sbi(struct hpfs_sb_info *sbi)
+{
+	kfree(sbi->sb_cp_table);
+	kfree(sbi->sb_bmp_dir);
+	kfree(sbi);
+}
+
+static void lazy_free_sbi(struct rcu_head *rcu)
+{
+	free_sbi(container_of(rcu, struct hpfs_sb_info, rcu));
+}
+
 static void hpfs_put_super(struct super_block *s)
 {
-	struct hpfs_sb_info *sbi = hpfs_sb(s);
-
 	hpfs_lock(s);
 	unmark_dirty(s);
 	hpfs_unlock(s);
-
-	kfree(sbi->sb_cp_table);
-	kfree(sbi->sb_bmp_dir);
-	s->s_fs_info = NULL;
-	kfree(sbi);
+	call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi);
 }
 
 unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -485,9 +491,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	}
 	s->s_fs_info = sbi;
 
-	sbi->sb_bmp_dir = NULL;
-	sbi->sb_cp_table = NULL;
-
 	mutex_init(&sbi->hpfs_mutex);
 	hpfs_lock(s);
 
@@ -679,10 +682,7 @@ bail2:	brelse(bh0);
 bail1:
 bail0:
 	hpfs_unlock(s);
-	kfree(sbi->sb_bmp_dir);
-	kfree(sbi->sb_cp_table);
-	s->s_fs_info = NULL;
-	kfree(sbi);
+	free_sbi(sbi);
 	return -EINVAL;
 }
 

From 2d1d9b5b5cc2d7d528a7cbf621d924de38b1b6b6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 12:37:18 -0400
Subject: [PATCH 038/101] adfs: delayed freeing of sbi

makes ->d_hash() and ->d_compare() safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/adfs.h  | 9 ++++++---
 fs/adfs/super.c | 3 +--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 585adafb0cc2..c770337c4b45 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -43,9 +43,12 @@ struct adfs_dir_ops;
  * ADFS file system superblock data in memory
  */
 struct adfs_sb_info {
-	struct adfs_discmap *s_map;	/* bh list containing map		 */
-	struct adfs_dir_ops *s_dir;	/* directory operations			 */
-
+	union { struct {
+		struct adfs_discmap *s_map;	/* bh list containing map	 */
+		struct adfs_dir_ops *s_dir;	/* directory operations		 */
+		};
+		struct rcu_head rcu;		/* used only at shutdown time	 */
+	};
 	kuid_t		s_uid;		/* owner uid				 */
 	kgid_t		s_gid;		/* owner gid				 */
 	umode_t		s_owner_mask;	/* ADFS owner perm -> unix perm		 */
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ff4bae2c2a2..7b3003cb6f1b 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb)
 	for (i = 0; i < asb->s_map_size; i++)
 		brelse(asb->s_map[i].dm_bh);
 	kfree(asb->s_map);
-	kfree(asb);
-	sb->s_fs_info = NULL;
+	kfree_rcu(asb, rcu);
 }
 
 static int adfs_show_options(struct seq_file *seq, struct dentry *root)

From baa40671d3e3b590a33b2c0e022db61cbebf5c00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 12:46:44 -0400
Subject: [PATCH 039/101] autofs4: make freeing sbi rcu-delayed

makes ->d_managed() safety in RCU mode independent from vfsmount_lock

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/autofs4/autofs_i.h |  1 +
 fs/autofs4/inode.c    | 13 ++++---------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index ebab147e9700..4218e26df916 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -122,6 +122,7 @@ struct autofs_sb_info {
 	spinlock_t lookup_lock;
 	struct list_head active_list;
 	struct list_head expiring_list;
+	struct rcu_head rcu;
 };
 
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index b104726e2d0a..3b9cc9b973c2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb)
 	 * just call kill_anon_super when we are called from
 	 * deactivate_super.
 	 */
-	if (!sbi)
-		goto out_kill_sb;
+	if (sbi) /* Free wait queues, close pipe */
+		autofs4_catatonic_mode(sbi);
 
-	/* Free wait queues, close pipe */
-	autofs4_catatonic_mode(sbi);
-
-	sb->s_fs_info = NULL;
-	kfree(sbi);
-
-out_kill_sb:
 	DPRINTK("shutting down");
 	kill_litter_super(sb);
+	if (sbi)
+		kfree_rcu(sbi, rcu);
 }
 
 static int autofs4_show_options(struct seq_file *m, struct dentry *root)

From 2e32cf5ef2683a196df24ccd2cd2b4ed735ad9d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 12:53:37 -0400
Subject: [PATCH 040/101] cifs: rcu-delay unload_nls() and freeing sbi

makes ->d_hash(), ->d_compare() and ->permission() safety in RCU mode
independent from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/cifs_fs_sb.h |  1 +
 fs/cifs/connect.c    | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 37e4a72a7d1c..9409fa10bd5c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -65,5 +65,6 @@ struct cifs_sb_info {
 	char   *mountdata; /* options received at mount time or via DFS refs */
 	struct backing_dev_info bdi;
 	struct delayed_work prune_tlinks;
+	struct rcu_head rcu;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a279ffc0bc29..886db8b41e35 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3755,6 +3755,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 	return rc;
 }
 
+static void delayed_free(struct rcu_head *p)
+{
+	struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
+	unload_nls(sbi->local_nls);
+	kfree(sbi);
+}
+
 void
 cifs_umount(struct cifs_sb_info *cifs_sb)
 {
@@ -3779,8 +3786,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
 
 	bdi_destroy(&cifs_sb->bdi);
 	kfree(cifs_sb->mountdata);
-	unload_nls(cifs_sb->local_nls);
-	kfree(cifs_sb);
+	call_rcu(&cifs_sb->rcu, delayed_free);
 }
 
 int

From cac45b062c67f86dc1d91d675128838773523243 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 13:16:50 -0400
Subject: [PATCH 041/101] fat: rcu-delay unloading nls and freeing sbi

makes ->d_hash() and ->d_compare() safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fat/fat.h   |  1 +
 fs/fat/inode.c | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 4241e6f39e86..7c31f4bc74a9 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -102,6 +102,7 @@ struct msdos_sb_info {
 	struct hlist_head dir_hashtable[FAT_HASH_SIZE];
 
 	unsigned int dirty;           /* fs state before mount */
+	struct rcu_head rcu;
 };
 
 #define FAT_CACHE_VALID	0	/* special case for valid cache */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0062da21dd8b..854b578f6695 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -548,6 +548,16 @@ static void fat_set_state(struct super_block *sb,
 	brelse(bh);
 }
 
+static void delayed_free(struct rcu_head *p)
+{
+	struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
+	unload_nls(sbi->nls_disk);
+	unload_nls(sbi->nls_io);
+	if (sbi->options.iocharset != fat_default_iocharset)
+		kfree(sbi->options.iocharset);
+	kfree(sbi);
+}
+
 static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -557,14 +567,7 @@ static void fat_put_super(struct super_block *sb)
 	iput(sbi->fsinfo_inode);
 	iput(sbi->fat_inode);
 
-	unload_nls(sbi->nls_disk);
-	unload_nls(sbi->nls_io);
-
-	if (sbi->options.iocharset != fat_default_iocharset)
-		kfree(sbi->options.iocharset);
-
-	sb->s_fs_info = NULL;
-	kfree(sbi);
+	call_rcu(&sbi->rcu, delayed_free);
 }
 
 static struct kmem_cache *fat_inode_cachep;

From 1dcddd4abd2c6df7f28928ad5cafa4a1cd20030b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 13:22:44 -0400
Subject: [PATCH 042/101] ncpfs: rcu-delay unload_nls() and freeing ncp_server

makes ->d_hash() and ->d_compare() safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/inode.c     | 19 ++++++++++++-------
 fs/ncpfs/ncp_fs_sb.h |  2 +-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 4659da67e7f6..2cf2ebecb55f 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -782,6 +782,17 @@ out:
 	return error;
 }
 
+static void delayed_free(struct rcu_head *p)
+{
+	struct ncp_server *server = container_of(p, struct ncp_server, rcu);
+#ifdef CONFIG_NCPFS_NLS
+	/* unload the NLS charsets */
+	unload_nls(server->nls_vol);
+	unload_nls(server->nls_io);
+#endif /* CONFIG_NCPFS_NLS */
+	kfree(server);
+}
+
 static void ncp_put_super(struct super_block *sb)
 {
 	struct ncp_server *server = NCP_SBP(sb);
@@ -792,11 +803,6 @@ static void ncp_put_super(struct super_block *sb)
 
 	ncp_stop_tasks(server);
 
-#ifdef CONFIG_NCPFS_NLS
-	/* unload the NLS charsets */
-	unload_nls(server->nls_vol);
-	unload_nls(server->nls_io);
-#endif /* CONFIG_NCPFS_NLS */
 	mutex_destroy(&server->rcv.creq_mutex);
 	mutex_destroy(&server->root_setup_lock);
 	mutex_destroy(&server->mutex);
@@ -813,8 +819,7 @@ static void ncp_put_super(struct super_block *sb)
 	vfree(server->rxbuf);
 	vfree(server->txbuf);
 	vfree(server->packet);
-	sb->s_fs_info = NULL;
-	kfree(server);
+	call_rcu(&server->rcu, delayed_free);
 }
 
 static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index c51b2c543539..b81e97adc5a9 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -38,7 +38,7 @@ struct ncp_mount_data_kernel {
 };
 
 struct ncp_server {
-
+	struct rcu_head rcu;
 	struct ncp_mount_data_kernel m;	/* Nearly all of the mount data is of
 					   interest for us later, so we store
 					   it completely. */

From 1adfcb03e31ba0d6be5fddf773da4357d0792cbb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 13:28:06 -0400
Subject: [PATCH 043/101] pid_namespace: make freeing struct pid_namespace
 rcu-delayed

makes procfs ->premission() instances safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/pid_namespace.h | 1 +
 kernel/pid_namespace.c        | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index e2772666f004..7246ef3d4455 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -23,6 +23,7 @@ struct bsd_acct_struct;
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
+	struct rcu_head rcu;
 	int last_pid;
 	unsigned int nr_hashed;
 	struct task_struct *child_reaper;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 42086551a24a..06c62de9c711 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -132,6 +132,12 @@ out:
 	return ERR_PTR(err);
 }
 
+static void delayed_free_pidns(struct rcu_head *p)
+{
+	kmem_cache_free(pid_ns_cachep,
+			container_of(p, struct pid_namespace, rcu));
+}
+
 static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
@@ -140,7 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
-	kmem_cache_free(pid_ns_cachep, ns);
+	call_rcu(&ns->rcu, delayed_free_pidns);
 }
 
 struct pid_namespace *copy_pid_ns(unsigned long flags,

From dd3e2c55a45fe75f3213b17b74f9a32380f4e84b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Oct 2013 21:21:39 -0400
Subject: [PATCH 044/101] fuse: rcu-delay freeing fuse_conn

makes ->permission() and ->d_revalidate() safety in RCU mode independent
from vfsmount_lock.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/cuse.c   | 2 +-
 fs/fuse/fuse_i.h | 2 ++
 fs/fuse/inode.c  | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index adbfd66b380f..8ca88d716dd3 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -473,7 +473,7 @@ err:
 static void cuse_fc_release(struct fuse_conn *fc)
 {
 	struct cuse_conn *cc = fc_to_cc(fc);
-	kfree(cc);
+	kfree_rcu(cc, fc.rcu);
 }
 
 /**
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fe446c222e3f..4715674f050c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -377,6 +377,8 @@ struct fuse_conn {
 	/** Refcount */
 	atomic_t count;
 
+	struct rcu_head rcu;
+
 	/** The user id for this mount */
 	kuid_t user_id;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1c15613c64f8..d468643a68b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -918,7 +918,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 
 static void fuse_free_conn(struct fuse_conn *fc)
 {
-	kfree(fc);
+	kfree_rcu(fc, rcu);
 }
 
 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)

From 42c326082d8a2c91506f951ace638deae1faf083 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Nov 2013 12:31:16 -0500
Subject: [PATCH 045/101] switch shrink_dcache_for_umount() to use of d_walk()

we have too many iterators in fs/dcache.c...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 195 +++++++++++++++++++++++-----------------------------
 1 file changed, 85 insertions(+), 110 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index c8e83d0d61ac..eb0978da1bd4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1074,116 +1074,6 @@ void shrink_dcache_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
-/*
- * destroy a single subtree of dentries for unmount
- * - see the comments on shrink_dcache_for_umount() for a description of the
- *   locking
- */
-static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
-{
-	struct dentry *parent;
-
-	BUG_ON(!IS_ROOT(dentry));
-
-	for (;;) {
-		/* descend to the first leaf in the current subtree */
-		while (!list_empty(&dentry->d_subdirs))
-			dentry = list_entry(dentry->d_subdirs.next,
-					    struct dentry, d_u.d_child);
-
-		/* consume the dentries from this leaf up through its parents
-		 * until we find one with children or run out altogether */
-		do {
-			struct inode *inode;
-
-			/*
-			 * inform the fs that this dentry is about to be
-			 * unhashed and destroyed.
-			 */
-			if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
-			    !d_unhashed(dentry))
-				dentry->d_op->d_prune(dentry);
-
-			dentry_lru_del(dentry);
-			__d_shrink(dentry);
-
-			if (dentry->d_lockref.count != 0) {
-				printk(KERN_ERR
-				       "BUG: Dentry %p{i=%lx,n=%s}"
-				       " still in use (%d)"
-				       " [unmount of %s %s]\n",
-				       dentry,
-				       dentry->d_inode ?
-				       dentry->d_inode->i_ino : 0UL,
-				       dentry->d_name.name,
-				       dentry->d_lockref.count,
-				       dentry->d_sb->s_type->name,
-				       dentry->d_sb->s_id);
-				BUG();
-			}
-
-			if (IS_ROOT(dentry)) {
-				parent = NULL;
-				list_del(&dentry->d_u.d_child);
-			} else {
-				parent = dentry->d_parent;
-				parent->d_lockref.count--;
-				list_del(&dentry->d_u.d_child);
-			}
-
-			inode = dentry->d_inode;
-			if (inode) {
-				dentry->d_inode = NULL;
-				hlist_del_init(&dentry->d_alias);
-				if (dentry->d_op && dentry->d_op->d_iput)
-					dentry->d_op->d_iput(dentry, inode);
-				else
-					iput(inode);
-			}
-
-			d_free(dentry);
-
-			/* finished when we fall off the top of the tree,
-			 * otherwise we ascend to the parent and move to the
-			 * next sibling if there is one */
-			if (!parent)
-				return;
-			dentry = parent;
-		} while (list_empty(&dentry->d_subdirs));
-
-		dentry = list_entry(dentry->d_subdirs.next,
-				    struct dentry, d_u.d_child);
-	}
-}
-
-/*
- * destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock because:
- *   - the superblock is detached from all mountings and open files, so the
- *     dentry trees will not be rearranged by the VFS
- *   - s_umount is write-locked, so the memory pressure shrinker will ignore
- *     any dentries belonging to this superblock that it comes across
- *   - the filesystem itself is no longer permitted to rearrange the dentries
- *     in this superblock
- */
-void shrink_dcache_for_umount(struct super_block *sb)
-{
-	struct dentry *dentry;
-
-	if (down_read_trylock(&sb->s_umount))
-		BUG();
-
-	dentry = sb->s_root;
-	sb->s_root = NULL;
-	dentry->d_lockref.count--;
-	shrink_dcache_for_umount_subtree(dentry);
-
-	while (!hlist_bl_empty(&sb->s_anon)) {
-		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
-		shrink_dcache_for_umount_subtree(dentry);
-	}
-}
-
 /*
  * This tries to ascend one level of parenthood, but
  * we can race with renaming, so we need to re-check
@@ -1478,6 +1368,91 @@ void shrink_dcache_parent(struct dentry *parent)
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
+static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
+{
+	struct select_data *data = _data;
+	enum d_walk_ret ret = D_WALK_CONTINUE;
+
+	if (dentry->d_lockref.count) {
+		dentry_lru_del(dentry);
+		if (likely(!list_empty(&dentry->d_subdirs)))
+			goto out;
+		if (dentry == data->start && dentry->d_lockref.count == 1)
+			goto out;
+		printk(KERN_ERR
+		       "BUG: Dentry %p{i=%lx,n=%s}"
+		       " still in use (%d)"
+		       " [unmount of %s %s]\n",
+		       dentry,
+		       dentry->d_inode ?
+		       dentry->d_inode->i_ino : 0UL,
+		       dentry->d_name.name,
+		       dentry->d_lockref.count,
+		       dentry->d_sb->s_type->name,
+		       dentry->d_sb->s_id);
+		BUG();
+	} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+		/*
+		 * We can't use d_lru_shrink_move() because we
+		 * need to get the global LRU lock and do the
+		 * LRU accounting.
+		 */
+		if (dentry->d_flags & DCACHE_LRU_LIST)
+			d_lru_del(dentry);
+		d_shrink_add(dentry, &data->dispose);
+		data->found++;
+		ret = D_WALK_NORETRY;
+	}
+out:
+	if (data->found && need_resched())
+		ret = D_WALK_QUIT;
+	return ret;
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+	struct dentry *dentry;
+
+	if (down_read_trylock(&sb->s_umount))
+		BUG();
+
+	dentry = sb->s_root;
+	sb->s_root = NULL;
+	for (;;) {
+		struct select_data data;
+
+		INIT_LIST_HEAD(&data.dispose);
+		data.start = dentry;
+		data.found = 0;
+
+		d_walk(dentry, &data, umount_collect, NULL);
+		if (!data.found)
+			break;
+
+		shrink_dentry_list(&data.dispose);
+		cond_resched();
+	}
+	d_drop(dentry);
+	dput(dentry);
+
+	while (!hlist_bl_empty(&sb->s_anon)) {
+		struct select_data data;
+		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
+
+		INIT_LIST_HEAD(&data.dispose);
+		data.start = NULL;
+		data.found = 0;
+
+		d_walk(dentry, &data, umount_collect, NULL);
+		if (data.found)
+			shrink_dentry_list(&data.dispose);
+		cond_resched();
+	}
+}
+
 static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
 {
 	struct select_data *data = _data;

From 48a066e72d970a3e225a9c18690d570c736fc455 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Sep 2013 22:06:07 -0400
Subject: [PATCH 046/101] RCU'd vfsmounts

* RCU-delayed freeing of vfsmounts
* vfsmount_lock replaced with a seqlock (mount_lock)
* sequence number from mount_lock is stored in nameidata->m_seq and
used when we exit RCU mode
* new vfsmount flag - MNT_SYNC_UMOUNT.  Set by umount_tree() when its
caller knows that vfsmount will have no surviving references.
* synchronize_rcu() done between unlocking namespace_sem in namespace_unlock()
and doing pending mntput().
* new helper: legitimize_mnt(mnt, seq).  Checks the mount_lock sequence
number against seq, then grabs reference to mnt.  Then it rechecks mount_lock
again to close the race and either returns success or drops the reference it
has acquired.  The subtle point is that in case of MNT_SYNC_UMOUNT we can
simply decrement the refcount and sod off - aforementioned synchronize_rcu()
makes sure that final mntput() won't come until we leave RCU mode.  We need
that, since we don't want to end up with some lazy pathwalk racing with
umount() and stealing the final mntput() from it - caller of umount() may
expect it to return only once the fs is shut down and we don't want to break
that.  In other cases (i.e. with MNT_SYNC_UMOUNT absent) we have to do
full-blown mntput() in case of mount_lock sequence number mismatch happening
just as we'd grabbed the reference, but in those cases we won't be stealing
the final mntput() from anything that would care.
* mntput_no_expire() doesn't lock anything on the fast path now.  Incidentally,
SMP and UP cases are handled the same way - no ifdefs there.
* normal pathname resolution does *not* do any writes to mount_lock.  It does,
of course, bump the refcounts of vfsmount and dentry in the very end, but that's
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c           |  20 +++++--
 fs/mount.h            |  10 ++--
 fs/namei.c            |  50 ++++++++--------
 fs/namespace.c        | 135 +++++++++++++++++++++++++++---------------
 include/linux/mount.h |   2 +
 include/linux/namei.h |   2 +-
 6 files changed, 136 insertions(+), 83 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index eb0978da1bd4..aafa2a146434 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2887,24 +2887,28 @@ static int prepend_path(const struct path *path,
 	struct vfsmount *vfsmnt = path->mnt;
 	struct mount *mnt = real_mount(vfsmnt);
 	int error = 0;
-	unsigned seq = 0;
+	unsigned seq, m_seq = 0;
 	char *bptr;
 	int blen;
 
-	br_read_lock(&vfsmount_lock);
 	rcu_read_lock();
+restart_mnt:
+	read_seqbegin_or_lock(&mount_lock, &m_seq);
+	seq = 0;
 restart:
 	bptr = *buffer;
 	blen = *buflen;
+	error = 0;
 	read_seqbegin_or_lock(&rename_lock, &seq);
 	while (dentry != root->dentry || vfsmnt != root->mnt) {
 		struct dentry * parent;
 
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+			struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
 			/* Global root? */
-			if (mnt_has_parent(mnt)) {
-				dentry = mnt->mnt_mountpoint;
-				mnt = mnt->mnt_parent;
+			if (mnt != parent) {
+				dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
+				mnt = parent;
 				vfsmnt = &mnt->mnt;
 				continue;
 			}
@@ -2938,7 +2942,11 @@ restart:
 		goto restart;
 	}
 	done_seqretry(&rename_lock, seq);
-	br_read_unlock(&vfsmount_lock);
+	if (need_seqretry(&mount_lock, m_seq)) {
+		m_seq = 1;
+		goto restart_mnt;
+	}
+	done_seqretry(&mount_lock, m_seq);
 
 	if (error >= 0 && bptr == *buffer) {
 		if (--blen < 0)
diff --git a/fs/mount.h b/fs/mount.h
index f0866076de6e..d64c594be6c4 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,7 +1,6 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
-#include <linux/lglock.h>
 
 struct mnt_namespace {
 	atomic_t		count;
@@ -30,6 +29,7 @@ struct mount {
 	struct mount *mnt_parent;
 	struct dentry *mnt_mountpoint;
 	struct vfsmount mnt;
+	struct rcu_head mnt_rcu;
 #ifdef CONFIG_SMP
 	struct mnt_pcp __percpu *mnt_pcp;
 #else
@@ -80,21 +80,23 @@ static inline int is_mounted(struct vfsmount *mnt)
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
 extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
 
+extern bool legitimize_mnt(struct vfsmount *, unsigned);
+
 static inline void get_mnt_ns(struct mnt_namespace *ns)
 {
 	atomic_inc(&ns->count);
 }
 
-extern struct lglock vfsmount_lock;
+extern seqlock_t mount_lock;
 
 static inline void lock_mount_hash(void)
 {
-	br_write_lock(&vfsmount_lock);
+	write_seqlock(&mount_lock);
 }
 
 static inline void unlock_mount_hash(void)
 {
-	br_write_unlock(&vfsmount_lock);
+	write_sequnlock(&mount_lock);
 }
 
 struct proc_mounts {
diff --git a/fs/namei.c b/fs/namei.c
index 1f844fbfce72..cb0ebae07e52 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -484,14 +484,12 @@ EXPORT_SYMBOL(path_put);
 
 static inline void lock_rcu_walk(void)
 {
-	br_read_lock(&vfsmount_lock);
 	rcu_read_lock();
 }
 
 static inline void unlock_rcu_walk(void)
 {
 	rcu_read_unlock();
-	br_read_unlock(&vfsmount_lock);
 }
 
 /**
@@ -512,26 +510,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
 	/*
-	 * Get a reference to the parent first: we're
-	 * going to make "path_put(nd->path)" valid in
-	 * non-RCU context for "terminate_walk()".
-	 *
-	 * If this doesn't work, return immediately with
-	 * RCU walking still active (and then we will do
-	 * the RCU walk cleanup in terminate_walk()).
+	 * After legitimizing the bastards, terminate_walk()
+	 * will do the right thing for non-RCU mode, and all our
+	 * subsequent exit cases should rcu_read_unlock()
+	 * before returning.  Do vfsmount first; if dentry
+	 * can't be legitimized, just set nd->path.dentry to NULL
+	 * and rely on dput(NULL) being a no-op.
 	 */
-	if (!lockref_get_not_dead(&parent->d_lockref))
+	if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
 		return -ECHILD;
-
-	/*
-	 * After the mntget(), we terminate_walk() will do
-	 * the right thing for non-RCU mode, and all our
-	 * subsequent exit cases should unlock_rcu_walk()
-	 * before returning.
-	 */
-	mntget(nd->path.mnt);
 	nd->flags &= ~LOOKUP_RCU;
 
+	if (!lockref_get_not_dead(&parent->d_lockref)) {
+		nd->path.dentry = NULL;	
+		unlock_rcu_walk();
+		return -ECHILD;
+	}
+
 	/*
 	 * For a negative lookup, the lookup sequence point is the parents
 	 * sequence point, and it only needs to revalidate the parent dentry.
@@ -608,16 +603,21 @@ static int complete_walk(struct nameidata *nd)
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
 
+		if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+			unlock_rcu_walk();
+			return -ECHILD;
+		}
 		if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
 			unlock_rcu_walk();
+			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
 		if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
 			unlock_rcu_walk();
 			dput(dentry);
+			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
-		mntget(nd->path.mnt);
 		unlock_rcu_walk();
 	}
 
@@ -909,15 +909,15 @@ int follow_up(struct path *path)
 	struct mount *parent;
 	struct dentry *mountpoint;
 
-	br_read_lock(&vfsmount_lock);
+	read_seqlock_excl(&mount_lock);
 	parent = mnt->mnt_parent;
 	if (parent == mnt) {
-		br_read_unlock(&vfsmount_lock);
+		read_sequnlock_excl(&mount_lock);
 		return 0;
 	}
 	mntget(&parent->mnt);
 	mountpoint = dget(mnt->mnt_mountpoint);
-	br_read_unlock(&vfsmount_lock);
+	read_sequnlock_excl(&mount_lock);
 	dput(path->dentry);
 	path->dentry = mountpoint;
 	mntput(path->mnt);
@@ -1048,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags)
 
 			/* Something is mounted on this dentry in another
 			 * namespace and/or whatever was mounted there in this
-			 * namespace got unmounted before we managed to get the
-			 * vfsmount_lock */
+			 * namespace got unmounted before lookup_mnt() could
+			 * get it */
 		}
 
 		/* Handle an automount point */
@@ -1864,6 +1864,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		if (flags & LOOKUP_RCU) {
 			lock_rcu_walk();
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+			nd->m_seq = read_seqbegin(&mount_lock);
 		} else {
 			path_get(&nd->path);
 		}
@@ -1872,6 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 
 	nd->root.mnt = NULL;
 
+	nd->m_seq = read_seqbegin(&mount_lock);
 	if (*name=='/') {
 		if (flags & LOOKUP_RCU) {
 			lock_rcu_walk();
diff --git a/fs/namespace.c b/fs/namespace.c
index 500202ce10db..ac2ce8a766e1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
  * It should be taken for write in all cases where the vfsmount
  * tree or hash is modified or when a vfsmount structure is modified.
  */
-DEFINE_BRLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
 
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -547,16 +547,38 @@ static void free_vfsmnt(struct mount *mnt)
 	kmem_cache_free(mnt_cache, mnt);
 }
 
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+	struct mount *mnt;
+	if (read_seqretry(&mount_lock, seq))
+		return false;
+	if (bastard == NULL)
+		return true;
+	mnt = real_mount(bastard);
+	mnt_add_count(mnt, 1);
+	if (likely(!read_seqretry(&mount_lock, seq)))
+		return true;
+	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
+		mnt_add_count(mnt, -1);
+		return false;
+	}
+	rcu_read_unlock();
+	mntput(bastard);
+	rcu_read_lock();
+	return false;
+}
+
 /*
  * find the first mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * call under rcu_read_lock()
  */
 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct list_head *head = mount_hashtable + hash(mnt, dentry);
 	struct mount *p;
 
-	list_for_each_entry(p, head, mnt_hash)
+	list_for_each_entry_rcu(p, head, mnt_hash)
 		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
 			return p;
 	return NULL;
@@ -564,7 +586,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 
 /*
  * find the last mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * mount_lock must be held.
  */
 struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -596,17 +618,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
 struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct mount *child_mnt;
+	struct vfsmount *m;
+	unsigned seq;
 
-	br_read_lock(&vfsmount_lock);
-	child_mnt = __lookup_mnt(path->mnt, path->dentry);
-	if (child_mnt) {
-		mnt_add_count(child_mnt, 1);
-		br_read_unlock(&vfsmount_lock);
-		return &child_mnt->mnt;
-	} else {
-		br_read_unlock(&vfsmount_lock);
-		return NULL;
-	}
+	rcu_read_lock();
+	do {
+		seq = read_seqbegin(&mount_lock);
+		child_mnt = __lookup_mnt(path->mnt, path->dentry);
+		m = child_mnt ? &child_mnt->mnt : NULL;
+	} while (!legitimize_mnt(m, seq));
+	rcu_read_unlock();
+	return m;
 }
 
 static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -874,38 +896,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	return ERR_PTR(err);
 }
 
+static void delayed_free(struct rcu_head *head)
+{
+	struct mount *mnt = container_of(head, struct mount, mnt_rcu);
+	kfree(mnt->mnt_devname);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_pcp);
+#endif
+	kmem_cache_free(mnt_cache, mnt);
+}
+
 static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
-#ifdef CONFIG_SMP
-	br_read_lock(&vfsmount_lock);
-	if (likely(mnt->mnt_ns)) {
-		/* shouldn't be the last one */
-		mnt_add_count(mnt, -1);
-		br_read_unlock(&vfsmount_lock);
+	rcu_read_lock();
+	mnt_add_count(mnt, -1);
+	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+		rcu_read_unlock();
 		return;
 	}
-	br_read_unlock(&vfsmount_lock);
-
 	lock_mount_hash();
-	mnt_add_count(mnt, -1);
 	if (mnt_get_count(mnt)) {
+		rcu_read_unlock();
 		unlock_mount_hash();
 		return;
 	}
-#else
-	mnt_add_count(mnt, -1);
-	if (likely(mnt_get_count(mnt)))
-		return;
-	lock_mount_hash();
-#endif
 	if (unlikely(mnt->mnt_pinned)) {
 		mnt_add_count(mnt, mnt->mnt_pinned + 1);
 		mnt->mnt_pinned = 0;
+		rcu_read_unlock();
 		unlock_mount_hash();
 		acct_auto_close_mnt(&mnt->mnt);
 		goto put_again;
 	}
+	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
+		rcu_read_unlock();
+		unlock_mount_hash();
+		return;
+	}
+	mnt->mnt.mnt_flags |= MNT_DOOMED;
+	rcu_read_unlock();
 
 	list_del(&mnt->mnt_instance);
 	unlock_mount_hash();
@@ -924,7 +954,8 @@ put_again:
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
-	free_vfsmnt(mnt);
+	mnt_free_id(mnt);
+	call_rcu(&mnt->mnt_rcu, delayed_free);
 }
 
 void mntput(struct vfsmount *mnt)
@@ -1137,6 +1168,8 @@ static void namespace_unlock(void)
 	list_splice_init(&unmounted, &head);
 	up_write(&namespace_sem);
 
+	synchronize_rcu();
+
 	while (!list_empty(&head)) {
 		mnt = list_first_entry(&head, struct mount, mnt_hash);
 		list_del_init(&mnt->mnt_hash);
@@ -1152,10 +1185,13 @@ static inline void namespace_lock(void)
 }
 
 /*
- * vfsmount lock must be held for write
+ * mount_lock must be held
  * namespace_sem must be held for write
+ * how = 0 => just this tree, don't propagate
+ * how = 1 => propagate; we know that nobody else has reference to any victims
+ * how = 2 => lazy umount
  */
-void umount_tree(struct mount *mnt, int propagate)
+void umount_tree(struct mount *mnt, int how)
 {
 	LIST_HEAD(tmp_list);
 	struct mount *p;
@@ -1163,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate)
 	for (p = mnt; p; p = next_mnt(p, mnt))
 		list_move(&p->mnt_hash, &tmp_list);
 
-	if (propagate)
+	if (how)
 		propagate_umount(&tmp_list);
 
 	list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1171,6 +1207,8 @@ void umount_tree(struct mount *mnt, int propagate)
 		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
+		if (how < 2)
+			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
 			put_mountpoint(p->mnt_mp);
@@ -1262,14 +1300,18 @@ static int do_umount(struct mount *mnt, int flags)
 	lock_mount_hash();
 	event++;
 
-	if (!(flags & MNT_DETACH))
-		shrink_submounts(mnt);
-
-	retval = -EBUSY;
-	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
+	if (flags & MNT_DETACH) {
 		if (!list_empty(&mnt->mnt_list))
-			umount_tree(mnt, 1);
+			umount_tree(mnt, 2);
 		retval = 0;
+	} else {
+		shrink_submounts(mnt);
+		retval = -EBUSY;
+		if (!propagate_mount_busy(mnt, 2)) {
+			if (!list_empty(&mnt->mnt_list))
+				umount_tree(mnt, 1);
+			retval = 0;
+		}
 	}
 	unlock_mount_hash();
 	namespace_unlock();
@@ -1955,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 	struct mount *parent;
 	int err;
 
-	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
+	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
 
 	mp = lock_mount(path);
 	if (IS_ERR(mp))
@@ -2172,7 +2214,7 @@ resume:
  * process a list of expirable mountpoints with the intent of discarding any
  * submounts of a specific parent mountpoint
  *
- * vfsmount_lock must be held for write
+ * mount_lock must be held for write
  */
 static void shrink_submounts(struct mount *mnt)
 {
@@ -2558,7 +2600,7 @@ out_type:
 /*
  * Return true if path is reachable from root
  *
- * namespace_sem or vfsmount_lock is held
+ * namespace_sem or mount_lock is held
  */
 bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 			 const struct path *root)
@@ -2573,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
 int path_is_under(struct path *path1, struct path *path2)
 {
 	int res;
-	br_read_lock(&vfsmount_lock);
+	read_seqlock_excl(&mount_lock);
 	res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
-	br_read_unlock(&vfsmount_lock);
+	read_sequnlock_excl(&mount_lock);
 	return res;
 }
 EXPORT_SYMBOL(path_is_under);
@@ -2748,8 +2790,6 @@ void __init mnt_init(void)
 	for (u = 0; u < HASH_SIZE; u++)
 		INIT_LIST_HEAD(&mountpoint_hashtable[u]);
 
-	br_lock_init(&vfsmount_lock);
-
 	err = sysfs_init();
 	if (err)
 		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2788,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
 	if (!IS_ERR_OR_NULL(mnt)) {
-		lock_mount_hash();
 		real_mount(mnt)->mnt_ns = NULL;
-		unlock_mount_hash();
+		synchronize_rcu();	/* yecchhh... */
 		mntput(mnt);
 	}
 }
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 38cd98f112a0..371d346fa270 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -49,6 +49,8 @@ struct mnt_namespace;
 
 #define MNT_LOCK_READONLY	0x400000
 #define MNT_LOCKED		0x800000
+#define MNT_DOOMED		0x1000000
+#define MNT_SYNC_UMOUNT		0x2000000
 
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8e47bc7a1665..492de72560fa 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,7 +16,7 @@ struct nameidata {
 	struct path	root;
 	struct inode	*inode; /* path.dentry.d_inode */
 	unsigned int	flags;
-	unsigned	seq;
+	unsigned	seq, m_seq;
 	int		last_type;
 	unsigned	depth;
 	char *saved_names[MAX_NESTED_LINKS + 1];

From 8b61e74ffc6310e1d35a9b51c8463093851f8bcf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Nov 2013 12:45:01 -0500
Subject: [PATCH 047/101] get rid of {lock,unlock}_rcu_walk()

those have become aliases for rcu_read_{lock,unlock}()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index cb0ebae07e52..e5c0118ba9f8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,16 +482,6 @@ EXPORT_SYMBOL(path_put);
  * to restart the path walk from the beginning in ref-walk mode.
  */
 
-static inline void lock_rcu_walk(void)
-{
-	rcu_read_lock();
-}
-
-static inline void unlock_rcu_walk(void)
-{
-	rcu_read_unlock();
-}
-
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
@@ -523,7 +513,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 
 	if (!lockref_get_not_dead(&parent->d_lockref)) {
 		nd->path.dentry = NULL;	
-		unlock_rcu_walk();
+		rcu_read_unlock();
 		return -ECHILD;
 	}
 
@@ -561,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 		spin_unlock(&fs->lock);
 	}
 
-	unlock_rcu_walk();
+	rcu_read_unlock();
 	return 0;
 
 unlock_and_drop_dentry:
 	spin_unlock(&fs->lock);
 drop_dentry:
-	unlock_rcu_walk();
+	rcu_read_unlock();
 	dput(dentry);
 	goto drop_root_mnt;
 out:
-	unlock_rcu_walk();
+	rcu_read_unlock();
 drop_root_mnt:
 	if (!(nd->flags & LOOKUP_ROOT))
 		nd->root.mnt = NULL;
@@ -604,21 +594,21 @@ static int complete_walk(struct nameidata *nd)
 			nd->root.mnt = NULL;
 
 		if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
-			unlock_rcu_walk();
+			rcu_read_unlock();
 			return -ECHILD;
 		}
 		if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
-			unlock_rcu_walk();
+			rcu_read_unlock();
 			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
 		if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
-			unlock_rcu_walk();
+			rcu_read_unlock();
 			dput(dentry);
 			mntput(nd->path.mnt);
 			return -ECHILD;
 		}
-		unlock_rcu_walk();
+		rcu_read_unlock();
 	}
 
 	if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -1174,7 +1164,7 @@ failed:
 	nd->flags &= ~LOOKUP_RCU;
 	if (!(nd->flags & LOOKUP_ROOT))
 		nd->root.mnt = NULL;
-	unlock_rcu_walk();
+	rcu_read_unlock();
 	return -ECHILD;
 }
 
@@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
 		nd->flags &= ~LOOKUP_RCU;
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		unlock_rcu_walk();
+		rcu_read_unlock();
 	}
 }
 
@@ -1862,7 +1852,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		nd->path = nd->root;
 		nd->inode = inode;
 		if (flags & LOOKUP_RCU) {
-			lock_rcu_walk();
+			rcu_read_lock();
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
 			nd->m_seq = read_seqbegin(&mount_lock);
 		} else {
@@ -1876,7 +1866,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	nd->m_seq = read_seqbegin(&mount_lock);
 	if (*name=='/') {
 		if (flags & LOOKUP_RCU) {
-			lock_rcu_walk();
+			rcu_read_lock();
 			set_root_rcu(nd);
 		} else {
 			set_root(nd);
@@ -1888,7 +1878,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 			struct fs_struct *fs = current->fs;
 			unsigned seq;
 
-			lock_rcu_walk();
+			rcu_read_lock();
 
 			do {
 				seq = read_seqcount_begin(&fs->seq);
@@ -1920,7 +1910,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 			if (f.need_put)
 				*fp = f.file;
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
-			lock_rcu_walk();
+			rcu_read_lock();
 		} else {
 			path_get(&nd->path);
 			fdput(f);

From eee5cc2702929fd41cce28058dc6d6717f723f87 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 11:06:42 -0400
Subject: [PATCH 048/101] get rid of s_files and files_lock

The only thing we need it for is alt-sysrq-r (emergency remount r/o)
and these days we can do just as well without going through the
list of files.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 125 ---------------------------------------------
 fs/internal.h      |   3 --
 fs/open.c          |   2 -
 fs/super.c         |  15 +-----
 include/linux/fs.h |  13 -----
 5 files changed, 2 insertions(+), 156 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index e61e5529fa9d..23b6dca03ba0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,8 +36,6 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-DEFINE_STATIC_LGLOCK(files_lglock);
-
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
 
@@ -134,7 +132,6 @@ struct file *get_empty_filp(void)
 		return ERR_PTR(error);
 	}
 
-	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	spin_lock_init(&f->f_lock);
@@ -304,7 +301,6 @@ void fput(struct file *file)
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
 
-		file_sb_list_del(file);
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
 			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
@@ -333,7 +329,6 @@ void __fput_sync(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
-		file_sb_list_del(file);
 		BUG_ON(!(task->flags & PF_KTHREAD));
 		__fput(file);
 	}
@@ -345,129 +340,10 @@ void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-static inline int file_list_cpu(struct file *file)
-{
-#ifdef CONFIG_SMP
-	return file->f_sb_list_cpu;
-#else
-	return smp_processor_id();
-#endif
-}
-
-/* helper for file_sb_list_add to reduce ifdefs */
-static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	struct list_head *list;
-#ifdef CONFIG_SMP
-	int cpu;
-	cpu = smp_processor_id();
-	file->f_sb_list_cpu = cpu;
-	list = per_cpu_ptr(sb->s_files, cpu);
-#else
-	list = &sb->s_files;
-#endif
-	list_add(&file->f_u.fu_list, list);
-}
-
-/**
- * file_sb_list_add - add a file to the sb's file list
- * @file: file to add
- * @sb: sb to add it to
- *
- * Use this function to associate a file with the superblock of the inode it
- * refers to.
- */
-void file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	if (likely(!(file->f_mode & FMODE_WRITE)))
-		return;
-	if (!S_ISREG(file_inode(file)->i_mode))
-		return;
-	lg_local_lock(&files_lglock);
-	__file_sb_list_add(file, sb);
-	lg_local_unlock(&files_lglock);
-}
-
-/**
- * file_sb_list_del - remove a file from the sb's file list
- * @file: file to remove
- * @sb: sb to remove it from
- *
- * Use this function to remove a file from its superblock.
- */
-void file_sb_list_del(struct file *file)
-{
-	if (!list_empty(&file->f_u.fu_list)) {
-		lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
-		list_del_init(&file->f_u.fu_list);
-		lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
-	}
-}
-
-#ifdef CONFIG_SMP
-
-/*
- * These macros iterate all files on all CPUs for a given superblock.
- * files_lglock must be held globally.
- */
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	int i;							\
-	for_each_possible_cpu(i) {				\
-		struct list_head *list;				\
-		list = per_cpu_ptr((__sb)->s_files, i);		\
-		list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-	}							\
-}
-
-#else
-
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	struct list_head *list;					\
-	list = &(sb)->s_files;					\
-	list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-}
-
-#endif
-
-/**
- *	mark_files_ro - mark all files read-only
- *	@sb: superblock in question
- *
- *	All files are marked read-only.  We don't care about pending
- *	delete files so this should be used in 'force' mode only.
- */
-void mark_files_ro(struct super_block *sb)
-{
-	struct file *f;
-
-	lg_global_lock(&files_lglock);
-	do_file_list_for_each_entry(sb, f) {
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		spin_lock(&f->f_lock);
-		f->f_mode &= ~FMODE_WRITE;
-		spin_unlock(&f->f_lock);
-		if (file_check_writeable(f) != 0)
-			continue;
-		__mnt_drop_write(f->f_path.mnt);
-		file_release_write(f);
-	} while_file_list_for_each_entry;
-	lg_global_unlock(&files_lglock);
-}
-
 void __init files_init(unsigned long mempages)
 { 
 	unsigned long n;
@@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages)
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
-	lg_lock_init(&files_lglock, "files_lglock");
 	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/internal.h b/fs/internal.h
index 4a11e75ce14d..465742407466 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -73,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
-extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
 /*
diff --git a/fs/open.c b/fs/open.c
index a1465b1ec8c7..fffbed40dbe9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -685,7 +685,6 @@ static int do_dentry_open(struct file *f,
 	}
 
 	f->f_mapping = inode->i_mapping;
-	file_sb_list_add(f, inode->i_sb);
 
 	if (unlikely(f->f_mode & FMODE_PATH)) {
 		f->f_op = &empty_fops;
@@ -724,7 +723,6 @@ static int do_dentry_open(struct file *f,
 
 cleanup_all:
 	fops_put(f->f_op);
-	file_sb_list_del(f);
 	if (f->f_mode & FMODE_WRITE) {
 		put_write_access(inode);
 		if (!special_file(inode->i_mode)) {
diff --git a/fs/super.c b/fs/super.c
index 743bb7118053..e5f6c2cfac38 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -140,9 +140,6 @@ static void destroy_super(struct super_block *s)
 	int i;
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
-#ifdef CONFIG_SMP
-	free_percpu(s->s_files);
-#endif
 	for (i = 0; i < SB_FREEZE_LEVELS; i++)
 		percpu_counter_destroy(&s->s_writers.counter[i]);
 	security_sb_free(s);
@@ -172,15 +169,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	if (security_sb_alloc(s))
 		goto fail;
 
-#ifdef CONFIG_SMP
-	s->s_files = alloc_percpu(struct list_head);
-	if (!s->s_files)
-		goto fail;
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
-#else
-	INIT_LIST_HEAD(&s->s_files);
-#endif
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
 		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
 			goto fail;
@@ -722,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	   make sure there are no rw files opened */
 	if (remount_ro) {
 		if (force) {
-			mark_files_ro(sb);
+			sb->s_readonly_remount = 1;
+			smp_wmb();
 		} else {
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ab8a67ee054..2b0f4e974480 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -764,12 +764,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 #define FILE_MNT_WRITE_RELEASED	2
 
 struct file {
-	/*
-	 * fu_list becomes invalid after file_free is called and queued via
-	 * fu_rcuhead for RCU freeing
-	 */
 	union {
-		struct list_head	fu_list;
 		struct llist_node	fu_llist;
 		struct rcu_head 	fu_rcuhead;
 	} f_u;
@@ -783,9 +778,6 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
-#ifdef CONFIG_SMP
-	int			f_sb_list_cpu;
-#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1264,11 +1256,6 @@ struct super_block {
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
-#ifdef CONFIG_SMP
-	struct list_head __percpu *s_files;
-#else
-	struct list_head	s_files;
-#endif
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
 	struct backing_dev_info *s_bdi;

From b61625d24596ea44555943867d5a5c1efd81074c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 11:09:01 -0400
Subject: [PATCH 049/101] fold __d_shrink() into its only remaining caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index aafa2a146434..fb7bcf3ba5d6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -483,27 +483,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	return parent;
 }
 
-/*
- * Unhash a dentry without inserting an RCU walk barrier or checking that
- * dentry->d_lock is locked.  The caller must take care of that, if
- * appropriate.
- */
-static void __d_shrink(struct dentry *dentry)
-{
-	if (!d_unhashed(dentry)) {
-		struct hlist_bl_head *b;
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
-			b = &dentry->d_sb->s_anon;
-		else
-			b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
-		hlist_bl_lock(b);
-		__hlist_bl_del(&dentry->d_hash);
-		dentry->d_hash.pprev = NULL;
-		hlist_bl_unlock(b);
-	}
-}
-
 /**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
@@ -522,7 +501,16 @@ static void __d_shrink(struct dentry *dentry)
 void __d_drop(struct dentry *dentry)
 {
 	if (!d_unhashed(dentry)) {
-		__d_shrink(dentry);
+		struct hlist_bl_head *b;
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+			b = &dentry->d_sb->s_anon;
+		else
+			b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+		hlist_bl_lock(b);
+		__hlist_bl_del(&dentry->d_hash);
+		dentry->d_hash.pprev = NULL;
+		hlist_bl_unlock(b);
 		dentry_rcuwalk_barrier(dentry);
 	}
 }

From 11d100d9a291508be5a5bae1ddf92ea2ba9b8f0c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Oct 2013 18:17:02 -0400
Subject: [PATCH 050/101] coda_revalidate_inode(): switch to passing inode...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/coda_linux.h | 2 +-
 fs/coda/dir.c        | 3 +--
 fs/coda/inode.c      | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index cc0ea9fe5ecf..e7550cb9fb74 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations;
 int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
 int coda_permission(struct inode *inode, int mask);
-int coda_revalidate_inode(struct dentry *);
+int coda_revalidate_inode(struct inode *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 8c8d469c9283..5efbb5ee0adc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -563,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry)
  * cache manager Venus issues a downcall to the kernel when this 
  * happens 
  */
-int coda_revalidate_inode(struct dentry *dentry)
+int coda_revalidate_inode(struct inode *inode)
 {
 	struct coda_vattr attr;
 	int error;
 	int old_mode;
 	ino_t old_ino;
-	struct inode *inode = dentry->d_inode;
 	struct coda_inode_info *cii = ITOC(inode);
 
 	if (!cii->c_flags)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 4dcc0d81a7aa..506de34a4ef3 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode)
 
 int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-	int err = coda_revalidate_inode(dentry);
+	int err = coda_revalidate_inode(dentry->d_inode);
 	if (!err)
 		generic_fillattr(dentry->d_inode, stat);
 	return err;

From 0f6ed63b170778b9c93fb0ae4017f110c9ee6416 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 14:19:39 -0400
Subject: [PATCH 051/101] no need to keep brlock macros anymore...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0d24e932db0b..96549abe8842 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,16 +25,6 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 
-/* can make br locks by using local lock for read side, global lock for write */
-#define br_lock_init(name)	lg_lock_init(name, #name)
-#define br_read_lock(name)	lg_local_lock(name)
-#define br_read_unlock(name)	lg_local_unlock(name)
-#define br_write_lock(name)	lg_global_lock(name)
-#define br_write_unlock(name)	lg_global_unlock(name)
-
-#define DEFINE_BRLOCK(name)		DEFINE_LGLOCK(name)
-#define DEFINE_STATIC_BRLOCK(name)	DEFINE_STATIC_LGLOCK(name)
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
 #else

From 7d2f551f6dc933f87933e906e48583169bbc7c27 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Oct 2013 11:10:08 -0400
Subject: [PATCH 052/101] restore 32bit aout coredump

just getting rid of bitrot

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_aout.c | 70 ++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index bae3aba95b15..80361c00fb64 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -25,6 +25,7 @@
 #include <linux/personality.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -33,14 +34,18 @@
 #include <asm/ia32.h>
 
 #undef WARN_OLD
-#undef CORE_DUMP /* definitely broken */
 
 static int load_aout_binary(struct linux_binprm *);
 static int load_aout_library(struct file *);
 
-#ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
-			  unsigned long limit);
+#ifdef CONFIG_COREDUMP
+static int aout_core_dump(struct coredump_params *);
+
+static unsigned long get_dr(int n)
+{
+	struct perf_event *bp = current->thread.ptrace_bps[n];
+	return bp ? bp->hw.info.address : 0;
+}
 
 /*
  * fill in the user structure for a core dump..
@@ -48,6 +53,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
 static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 {
 	u32 fs, gs;
+	memset(dump, 0, sizeof(*dump));
 
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
@@ -57,15 +63,12 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 	dump->u_dsize = ((unsigned long)
 			 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
-	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;
-	dump->u_debugreg[1] = current->thread.debugreg1;
-	dump->u_debugreg[2] = current->thread.debugreg2;
-	dump->u_debugreg[3] = current->thread.debugreg3;
-	dump->u_debugreg[4] = 0;
-	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[0] = get_dr(0);
+	dump->u_debugreg[1] = get_dr(1);
+	dump->u_debugreg[2] = get_dr(2);
+	dump->u_debugreg[3] = get_dr(3);
 	dump->u_debugreg[6] = current->thread.debugreg6;
-	dump->u_debugreg[7] = current->thread.debugreg7;
+	dump->u_debugreg[7] = current->thread.ptrace_dr7;
 
 	if (dump->start_stack < 0xc0000000) {
 		unsigned long tmp;
@@ -74,24 +77,24 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 		dump->u_ssize = tmp >> PAGE_SHIFT;
 	}
 
-	dump->regs.bx = regs->bx;
-	dump->regs.cx = regs->cx;
-	dump->regs.dx = regs->dx;
-	dump->regs.si = regs->si;
-	dump->regs.di = regs->di;
-	dump->regs.bp = regs->bp;
-	dump->regs.ax = regs->ax;
+	dump->regs.ebx = regs->bx;
+	dump->regs.ecx = regs->cx;
+	dump->regs.edx = regs->dx;
+	dump->regs.esi = regs->si;
+	dump->regs.edi = regs->di;
+	dump->regs.ebp = regs->bp;
+	dump->regs.eax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
 	savesegment(fs, fs);
 	dump->regs.fs = fs;
 	savesegment(gs, gs);
 	dump->regs.gs = gs;
-	dump->regs.orig_ax = regs->orig_ax;
-	dump->regs.ip = regs->ip;
+	dump->regs.orig_eax = regs->orig_ax;
+	dump->regs.eip = regs->ip;
 	dump->regs.cs = regs->cs;
-	dump->regs.flags = regs->flags;
-	dump->regs.sp = regs->sp;
+	dump->regs.eflags = regs->flags;
+	dump->regs.esp = regs->sp;
 	dump->regs.ss = regs->ss;
 
 #if 1 /* FIXME */
@@ -107,7 +110,7 @@ static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_aout_binary,
 	.load_shlib	= load_aout_library,
-#ifdef CORE_DUMP
+#ifdef CONFIG_COREDUMP
 	.core_dump	= aout_core_dump,
 #endif
 	.min_coredump	= PAGE_SIZE
@@ -122,7 +125,7 @@ static void set_brk(unsigned long start, unsigned long end)
 	vm_brk(start, end - start);
 }
 
-#ifdef CORE_DUMP
+#ifdef CONFIG_COREDUMP
 /*
  * These are the only things you should do on a core-file: use only these
  * macros to write out all the necessary info.
@@ -131,14 +134,14 @@ static void set_brk(unsigned long start, unsigned long end)
 #include <linux/coredump.h>
 
 #define DUMP_WRITE(addr, nr)			     \
-	if (!dump_write(file, (void *)(addr), (nr))) \
+	if (!dump_write(cprm->file, (void *)(addr), (nr))) \
 		goto end_coredump;
 
 #define DUMP_SEEK(offset)		\
-	if (!dump_seek(file, offset))	\
+	if (!dump_seek(cprm->file, offset))	\
 		goto end_coredump;
 
-#define START_DATA()	(u.u_tsize << PAGE_SHIFT)
+#define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
 #define START_STACK(u)	(u.start_stack)
 
 /*
@@ -151,8 +154,7 @@ static void set_brk(unsigned long start, unsigned long end)
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
-			  unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
 	mm_segment_t fs;
 	int has_dumped = 0;
@@ -164,19 +166,19 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
 	has_dumped = 1;
 	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
 	dump.u_ar0 = offsetof(struct user32, regs);
-	dump.signal = signr;
-	dump_thread32(regs, &dump);
+	dump.signal = cprm->siginfo->si_signo;
+	dump_thread32(cprm->regs, &dump);
 
 	/*
 	 * If the size of the dump file exceeds the rlimit, then see
 	 * what would happen if we wrote the stack, but not the data
 	 * area.
 	 */
-	if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 	/* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 	/* make sure we actually have a data and stack area to dump */

From ecc8c7725e6c21528329b34acae2a1d64b3af89b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 15:32:35 -0400
Subject: [PATCH 053/101] new helper: dump_emit()

dump_write() analog, takes core_dump_params instead of file,
keeps track of the amount written in cprm->written and checks for
cprm->limit.  Start using it in binfmt_elf.c...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          | 60 +++++++++++++++-------------------------
 fs/coredump.c            | 14 ++++++++++
 include/linux/binfmts.h  |  1 +
 include/linux/coredump.h |  2 ++
 4 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 501c8a4d6eb1..00fd9c969a27 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1225,35 +1225,23 @@ static int notesize(struct memelfnote *en)
 	return sz;
 }
 
-#define DUMP_WRITE(addr, nr, foffset)	\
-	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
+static int alignfile(struct coredump_params *cprm)
 {
 	static const char buf[4] = { 0, };
-	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
-	return 1;
+	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
 }
 
-static int writenote(struct memelfnote *men, struct file *file,
-			loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
 	en.n_namesz = strlen(men->name) + 1;
 	en.n_descsz = men->datasz;
 	en.n_type = men->type;
 
-	DUMP_WRITE(&en, sizeof(en), foffset);
-	DUMP_WRITE(men->name, en.n_namesz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-	DUMP_WRITE(men->data, men->datasz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-
-	return 1;
+	return dump_emit(cprm, &en, sizeof(en)) &&
+	    dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
+	    dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
 }
-#undef DUMP_WRITE
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags)
@@ -1702,7 +1690,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
  * process-wide notes are interleaved after the first thread-specific note.
  */
 static int write_note_info(struct elf_note_info *info,
-			   struct file *file, loff_t *foffset)
+			   struct coredump_params *cprm)
 {
 	bool first = 1;
 	struct elf_thread_core_info *t = info->thread;
@@ -1710,22 +1698,22 @@ static int write_note_info(struct elf_note_info *info,
 	do {
 		int i;
 
-		if (!writenote(&t->notes[0], file, foffset))
+		if (!writenote(&t->notes[0], cprm))
 			return 0;
 
-		if (first && !writenote(&info->psinfo, file, foffset))
+		if (first && !writenote(&info->psinfo, cprm))
 			return 0;
-		if (first && !writenote(&info->signote, file, foffset))
+		if (first && !writenote(&info->signote, cprm))
 			return 0;
-		if (first && !writenote(&info->auxv, file, foffset))
+		if (first && !writenote(&info->auxv, cprm))
 			return 0;
 		if (first && info->files.data &&
-				!writenote(&info->files, file, foffset))
+				!writenote(&info->files, cprm))
 			return 0;
 
 		for (i = 1; i < info->thread_notes; ++i)
 			if (t->notes[i].data &&
-			    !writenote(&t->notes[i], file, foffset))
+			    !writenote(&t->notes[i], cprm))
 				return 0;
 
 		first = 0;
@@ -1935,13 +1923,13 @@ static size_t get_note_info_size(struct elf_note_info *info)
 }
 
 static int write_note_info(struct elf_note_info *info,
-			   struct file *file, loff_t *foffset)
+			   struct coredump_params *cprm)
 {
 	int i;
 	struct list_head *t;
 
 	for (i = 0; i < info->numnote; i++)
-		if (!writenote(info->notes + i, file, foffset))
+		if (!writenote(info->notes + i, cprm))
 			return 0;
 
 	/* write out the thread status notes section */
@@ -1950,7 +1938,7 @@ static int write_note_info(struct elf_note_info *info,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file, foffset))
+			if (!writenote(&tmp->notes[i], cprm))
 				return 0;
 	}
 
@@ -2136,13 +2124,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	offset = dataoff;
 
-	size += sizeof(*elf);
-	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+	if (!dump_emit(cprm, elf, sizeof(*elf)))
 		goto end_coredump;
 
-	size += sizeof(*phdr4note);
-	if (size > cprm->limit
-	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
@@ -2164,19 +2149,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		size += sizeof(phdr);
-		if (size > cprm->limit
-		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			goto end_coredump;
 	}
+	size = cprm->written;
 
 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
 		goto end_coredump;
 
+	cprm->written = foffset;	/* will disappear */
  	/* write out the notes section */
-	if (!write_note_info(&info, cprm->file, &foffset))
+	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
+	foffset = cprm->written;
 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index 42c3b8423669..319f973bab72 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -693,6 +693,20 @@ int dump_write(struct file *file, const void *addr, int nr)
 }
 EXPORT_SYMBOL(dump_write);
 
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+{
+	struct file *file = cprm->file;
+	if (dump_interrupted() || !access_ok(VERIFY_READ, addr, nr))
+		return 0;
+	if (cprm->written + nr > cprm->limit)
+		return 0;
+	if (file->f_op->write(file, addr, nr, &file->f_pos) != nr)
+		return 0;
+	cprm->written += nr;
+	return 1;
+}
+EXPORT_SYMBOL(dump_emit);
+
 int dump_seek(struct file *file, loff_t off)
 {
 	int ret = 1;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e8112ae50531..8aa507e7a41a 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -61,6 +61,7 @@ struct coredump_params {
 	struct file *file;
 	unsigned long limit;
 	unsigned long mm_flags;
+	loff_t written;
 };
 
 /*
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index a98f1ca60407..2959376a9ad5 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -10,8 +10,10 @@
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
+struct coredump_params;
 extern int dump_write(struct file *file, const void *addr, int nr);
 extern int dump_seek(struct file *file, loff_t off);
+extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);
 #else

From 506f21c556c747bb07b893f146220ec45cda381b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 17:22:57 -0400
Subject: [PATCH 054/101] switch elf_core_write_extra_phdrs() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/elfcore.c |  6 ++----
 arch/x86/um/elfcore.c      |  7 ++-----
 fs/binfmt_elf.c            |  4 ++--
 fs/binfmt_elf_fdpic.c      |  4 +++-
 include/linux/elfcore.h    |  5 +++--
 kernel/elfcore.c           | 10 +++-------
 6 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index bac1639bc320..798ce543da4b 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -11,8 +11,7 @@ Elf64_Half elf_core_extra_phdrs(void)
 	return GATE_EHDR->e_phnum;
 }
 
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			       unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -35,8 +34,7 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 			phdr.p_offset += ofs;
 		}
 		phdr.p_paddr = 0; /* match other core phdrs */
-		*size += sizeof(phdr);
-		if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			return 0;
 	}
 	return 1;
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 6bb49b687c97..fc21f98efafe 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -11,8 +11,7 @@ Elf32_Half elf_core_extra_phdrs(void)
 	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
 }
 
-int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			       unsigned long limit)
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
@@ -32,9 +31,7 @@ int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 				phdr.p_offset += ofs;
 			}
 			phdr.p_paddr = 0; /* match other core phdrs */
-			*size += sizeof(phdr);
-			if (*size > limit
-			    || !dump_write(file, &phdr, sizeof(phdr)))
+			if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 				return 0;
 		}
 	}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 00fd9c969a27..35c4886dae2d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2152,11 +2152,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			goto end_coredump;
 	}
-	size = cprm->written;
 
-	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
+	size = cprm->written;
 	cprm->written = foffset;	/* will disappear */
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ea4c6273b4a5..44db8b92121a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1791,9 +1791,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
+	size = cprm->written;
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
 		if (!writenote(notes + i, cprm->file, &foffset))
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index cdd3d13efce7..1b92a8c40624 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -6,6 +6,8 @@
 #include <asm/elf.h>
 #include <uapi/linux/elfcore.h>
 
+struct coredump_params;
+
 static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
 {
 #ifdef ELF_CORE_COPY_REGS
@@ -63,8 +65,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
  */
 extern Elf_Half elf_core_extra_phdrs(void);
 extern int
-elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-			   unsigned long limit);
+elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
 elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
 extern size_t elf_core_extra_data_size(void);
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index ff915efef66d..e556751d15d9 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -1,23 +1,19 @@
 #include <linux/elf.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-
-#include <asm/elf.h>
-
+#include <linux/binfmts.h>
 
 Elf_Half __weak elf_core_extra_phdrs(void)
 {
 	return 0;
 }
 
-int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
-				      unsigned long limit)
+int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	return 1;
 }
 
-int __weak elf_core_write_extra_data(struct file *file, size_t *size,
-				     unsigned long limit)
+int __weak elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	return 1;
 }

From aa3e7eaf0a0f06edd2b733e84e7e8ffe108e8786 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 17:50:15 -0400
Subject: [PATCH 055/101] switch elf_core_write_extra_data() to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/elfcore.c | 6 ++----
 arch/x86/um/elfcore.c      | 8 ++------
 fs/binfmt_elf.c            | 4 +++-
 fs/binfmt_elf_fdpic.c      | 4 +++-
 include/linux/elfcore.h    | 2 +-
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 798ce543da4b..04bc8fd5f893 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -40,8 +40,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-int elf_core_write_extra_data(struct file *file, size_t *size,
-			      unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
@@ -52,8 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 			void *addr = (void *)gate_phdrs[i].p_vaddr;
 			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
 
-			*size += memsz;
-			if (*size > limit || !dump_write(file, addr, memsz))
+			if (!dump_emit(cprm, addr, memsz))
 				return 0;
 			break;
 		}
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index fc21f98efafe..7bb89a27a5e4 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -38,8 +38,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-int elf_core_write_extra_data(struct file *file, size_t *size,
-			      unsigned long limit)
+int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
@@ -52,10 +51,7 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 			if (phdrp[i].p_type == PT_LOAD) {
 				void *addr = (void *) phdrp[i].p_vaddr;
 				size_t filesz = phdrp[i].p_filesz;
-
-				*size += filesz;
-				if (*size > limit
-				    || !dump_write(file, addr, filesz))
+				if (!dump_emit(cprm, addr, filesz))
 					return 0;
 			}
 		}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 35c4886dae2d..a998b221b53a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2196,8 +2196,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 		}
 	}
 
-	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
+	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
 		size += sizeof(*shdr4extnum);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 44db8b92121a..77bf7e33e706 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1818,8 +1818,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				    cprm->mm_flags) < 0)
 		goto end_coredump;
 
-	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+	cprm->written = size;
+	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
+	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
 		size += sizeof(*shdr4extnum);
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 1b92a8c40624..698d51a0eea3 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -67,7 +67,7 @@ extern Elf_Half elf_core_extra_phdrs(void);
 extern int
 elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
-elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
+elf_core_write_extra_data(struct coredump_params *cprm);
 extern size_t elf_core_extra_data_size(void);
 
 #endif /* _LINUX_ELFCORE_H */

From 13046ece9625f96fafb0cff0b9b95a586b53b553 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 18:08:47 -0400
Subject: [PATCH 056/101] binfmt_elf: convert writing actual dump pages to
 dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a998b221b53a..ddab40d3d828 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2093,7 +2093,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	offset += sizeof(*elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
-	foffset = offset;
 
 	/* Write notes phdr entry */
 	{
@@ -2157,7 +2156,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	size = cprm->written;
-	cprm->written = foffset;	/* will disappear */
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
@@ -2170,6 +2168,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
+	cprm->written = size;
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
@@ -2184,9 +2183,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > cprm->limit) ||
-					!dump_write(cprm->file, kaddr,
-						    PAGE_SIZE);
+				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
@@ -2196,16 +2193,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 		}
 	}
 
-	cprm->written = size;
 	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
-	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
-		size += sizeof(*shdr4extnum);
-		if (size > cprm->limit
-		    || !dump_write(cprm->file, shdr4extnum,
-				   sizeof(*shdr4extnum)))
+		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
 			goto end_coredump;
 	}
 

From e6c1baa9b562ca296d57178c44f3894795d13d32 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 18:58:47 -0400
Subject: [PATCH 057/101] convert the rest of binfmt_elf_fdpic to dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf_fdpic.c | 114 ++++++++++++------------------------------
 1 file changed, 33 insertions(+), 81 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 77bf7e33e706..70e299917898 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1267,35 +1267,23 @@ static int notesize(struct memelfnote *en)
 
 /* #define DEBUG */
 
-#define DUMP_WRITE(addr, nr, foffset)	\
-	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
-
-static int alignfile(struct file *file, loff_t *foffset)
+static int alignfile(struct coredump_params *cprm)
 {
 	static const char buf[4] = { 0, };
-	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
-	return 1;
+	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
 }
 
-static int writenote(struct memelfnote *men, struct file *file,
-			loff_t *foffset)
+static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
 	en.n_namesz = strlen(men->name) + 1;
 	en.n_descsz = men->datasz;
 	en.n_type = men->type;
 
-	DUMP_WRITE(&en, sizeof(en), foffset);
-	DUMP_WRITE(men->name, en.n_namesz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-	DUMP_WRITE(men->data, men->datasz, foffset);
-	if (!alignfile(file, foffset))
-		return 0;
-
-	return 1;
+	return dump_emit(cprm, &en, sizeof(en)) &&
+		dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
+		dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
 }
-#undef DUMP_WRITE
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
@@ -1500,66 +1488,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 /*
  * dump the segments for an MMU process
  */
-#ifdef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
-			   unsigned long *limit, unsigned long mm_flags)
+static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
-	int err = 0;
 
 	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
 		unsigned long addr;
 
-		if (!maydump(vma, mm_flags))
+		if (!maydump(vma, cprm->mm_flags))
 			continue;
 
+#ifdef CONFIG_MMU
 		for (addr = vma->vm_start; addr < vma->vm_end;
 							addr += PAGE_SIZE) {
+			bool res;
 			struct page *page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				*size += PAGE_SIZE;
-				if (*size > *limit)
-					err = -EFBIG;
-				else if (!dump_write(file, kaddr, PAGE_SIZE))
-					err = -EIO;
+				res = dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
-			} else if (!dump_seek(file, PAGE_SIZE))
-				err = -EFBIG;
-			if (err)
-				goto out;
+			} else {
+				res = dump_seek(file, PAGE_SIZE);
+			}
+			if (!res)
+				return false;
 		}
-	}
-out:
-	return err;
-}
-#endif
-
-/*
- * dump the segments for a NOMMU process
- */
-#ifndef CONFIG_MMU
-static int elf_fdpic_dump_segments(struct file *file, size_t *size,
-			   unsigned long *limit, unsigned long mm_flags)
-{
-	struct vm_area_struct *vma;
-
-	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
-		if (!maydump(vma, mm_flags))
-			continue;
-
-		if ((*size += PAGE_SIZE) > *limit)
-			return -EFBIG;
-
-		if (!dump_write(file, (void *) vma->vm_start,
+#else
+		if (!dump_emit(cprm, (void *) vma->vm_start,
 				vma->vm_end - vma->vm_start))
-			return -EIO;
-	}
-
-	return 0;
-}
+			return false;
 #endif
+	}
+	return true;
+}
 
 static size_t elf_core_vma_data_size(unsigned long mm_flags)
 {
@@ -1755,13 +1717,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 	offset = dataoff;
 
-	size += sizeof(*elf);
-	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+	if (!dump_emit(cprm, elf, sizeof(*elf)))
 		goto end_coredump;
 
-	size += sizeof(*phdr4note);
-	if (size > cprm->limit
-	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
+	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
 		goto end_coredump;
 
 	/* write program headers for segments dump */
@@ -1785,20 +1744,18 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		size += sizeof(phdr);
-		if (size > cprm->limit
-		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
 			goto end_coredump;
 	}
 
-	cprm->written = size;
 	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
 	size = cprm->written;
+	cprm->written = foffset;
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, cprm->file, &foffset))
+		if (!writenote(notes + i, cprm))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1807,27 +1764,22 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], cprm->file, &foffset))
+			if (!writenote(&tmp->notes[i], cprm))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(cprm->file, dataoff - foffset))
-		goto end_coredump;
-
-	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
-				    cprm->mm_flags) < 0)
+	if (!dump_seek(cprm->file, dataoff - cprm->written))
 		goto end_coredump;
 
 	cprm->written = size;
+	if (!elf_fdpic_dump_segments(cprm))
+		goto end_coredump;
+
 	if (!elf_core_write_extra_data(cprm))
 		goto end_coredump;
-	size = cprm->written;
 
 	if (e_phnum == PN_XNUM) {
-		size += sizeof(*shdr4extnum);
-		if (size > cprm->limit
-		    || !dump_write(cprm->file, shdr4extnum,
-				   sizeof(*shdr4extnum)))
+		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
 			goto end_coredump;
 	}
 

From cdc3d5627d5f7c4e6b6372b9fb39cba0fe6a9b2a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Oct 2013 22:24:29 -0400
Subject: [PATCH 058/101] switch elf_coredump_extra_notes_write() to
 dump_emit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/include/asm/spu.h               |  3 +-
 arch/powerpc/platforms/cell/spu_syscalls.c   |  5 ++-
 arch/powerpc/platforms/cell/spufs/coredump.c | 44 ++++++++------------
 arch/powerpc/platforms/cell/spufs/spufs.h    |  3 +-
 fs/binfmt_elf.c                              |  7 ++--
 include/linux/elf.h                          |  6 +--
 6 files changed, 30 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 93f280e23279..37b7ca39ec9f 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -235,6 +235,7 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
 
 /* syscalls implemented in spufs */
 struct file;
+struct coredump_params;
 struct spufs_calls {
 	long (*create_thread)(const char __user *name,
 					unsigned int flags, umode_t mode,
@@ -242,7 +243,7 @@ struct spufs_calls {
 	long (*spu_run)(struct file *filp, __u32 __user *unpc,
 						__u32 __user *ustatus);
 	int (*coredump_extra_notes_size)(void);
-	int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset);
+	int (*coredump_extra_notes_write)(struct coredump_params *cprm);
 	void (*notify_spus_active)(void);
 	struct module *owner;
 };
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index db4e638cf408..3844f1397fc3 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/binfmts.h>
 
 #include <asm/spu.h>
 
@@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void)
 	return ret;
 }
 
-int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spufs_calls *calls;
 	int ret;
@@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_write(file, foffset);
+	ret = calls->coredump_extra_notes_write(cprm);
 
 	spufs_calls_put(calls);
 
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c9500ea7be2f..5d9b0a288f36 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
 #include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
 
 #include <asm/uaccess.h>
 
@@ -52,35 +54,24 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
+static int spufs_dump_write(struct coredump_params *cprm, const void *addr, int nr)
 {
-	unsigned long limit = rlimit(RLIMIT_CORE);
-	ssize_t written;
-
-	if (*foffset + nr > limit)
+	if (!dump_emit(cprm, addr, nr))
 		return -EIO;
-
-	written = file->f_op->write(file, addr, nr, &file->f_pos);
-	*foffset += written;
-
-	if (written != nr)
-		return -EIO;
-
 	return 0;
 }
 
-static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
-			    loff_t *foffset)
+static int spufs_dump_align(struct coredump_params *cprm, char *buf, loff_t new_off)
 {
 	int rc, size;
 
-	size = min((loff_t)PAGE_SIZE, new_off - *foffset);
+	size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
 	memset(buf, 0, size);
 
 	rc = 0;
-	while (rc == 0 && new_off > *foffset) {
-		size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-		rc = spufs_dump_write(file, buf, size, foffset);
+	while (rc == 0 && new_off > cprm->written) {
+		size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
+		rc = spufs_dump_write(cprm, buf, size);
 	}
 
 	return rc;
@@ -165,7 +156,7 @@ int spufs_coredump_extra_notes_size(void)
 }
 
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
-				  struct file *file, int dfd, loff_t *foffset)
+				  struct coredump_params *cprm, int dfd)
 {
 	loff_t pos = 0;
 	int sz, rc, nread, total = 0;
@@ -186,22 +177,22 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
-	rc = spufs_dump_write(file, &en, sizeof(en), foffset);
+	rc = spufs_dump_write(cprm, &en, sizeof(en));
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
+	rc = spufs_dump_write(cprm, fullname, en.n_namesz);
 	if (rc)
 		goto out;
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written, 4));
 	if (rc)
 		goto out;
 
 	do {
 		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
 		if (nread > 0) {
-			rc = spufs_dump_write(file, buf, nread, foffset);
+			rc = spufs_dump_write(cprm, buf, nread);
 			if (rc)
 				goto out;
 			total += nread;
@@ -213,15 +204,14 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 		goto out;
 	}
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
-			      foffset);
+	rc = spufs_dump_align(cprm, buf, roundup(cprm->written - total + sz, 4));
 
 out:
 	free_page((unsigned long)buf);
 	return rc;
 }
 
-int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spu_context *ctx;
 	int fd, j, rc;
@@ -233,7 +223,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 			return rc;
 
 		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
-			rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
 			if (rc) {
 				spu_release_saved(ctx);
 				return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 67852ade4c01..0ba3c9598358 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 
 /* system call implementation */
 extern struct spufs_calls spufs_calls;
+struct coredump_params;
 long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
 long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
 			umode_t mode, struct file *filp);
 /* ELF coredump callbacks for writing SPU ELF notes */
 extern int spufs_coredump_extra_notes_size(void);
-extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
 
 extern const struct file_operations spufs_context_fops;
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ddab40d3d828..3bf75d767a4a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2037,7 +2037,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	size_t size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
-	loff_t offset = 0, dataoff, foffset;
+	loff_t offset = 0, dataoff;
 	struct elf_note_info info = { };
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
@@ -2160,12 +2160,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
-	foffset = cprm->written;
-	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(cprm->file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - cprm->written))
 		goto end_coredump;
 
 	cprm->written = size;
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 40a3c0e01b2b..67a5fa7830c4 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -39,13 +39,13 @@ extern Elf64_Dyn _DYNAMIC [];
 
 /* Optional callbacks to write extra ELF notes. */
 struct file;
+struct coredump_params;
 
 #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
 static inline int elf_coredump_extra_notes_size(void) { return 0; }
-static inline int elf_coredump_extra_notes_write(struct file *file,
-			loff_t *foffset) { return 0; }
+static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; }
 #else
 extern int elf_coredump_extra_notes_size(void);
-extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int elf_coredump_extra_notes_write(struct coredump_params *cprm);
 #endif
 #endif /* _LINUX_ELF_H */

From 43a5d548eb5949580c1d96b9f3fa9f4c0bee0b98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Oct 2013 07:22:01 -0400
Subject: [PATCH 059/101] aout: switch to dump_emit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_aout.c | 20 ++++++++------------
 fs/binfmt_aout.c          |  7 +++----
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 80361c00fb64..9e26e9e85576 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -133,14 +133,6 @@ static void set_brk(unsigned long start, unsigned long end)
 
 #include <linux/coredump.h>
 
-#define DUMP_WRITE(addr, nr)			     \
-	if (!dump_write(cprm->file, (void *)(addr), (nr))) \
-		goto end_coredump;
-
-#define DUMP_SEEK(offset)		\
-	if (!dump_seek(cprm->file, offset))	\
-		goto end_coredump;
-
 #define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
 #define START_STACK(u)	(u.start_stack)
 
@@ -192,22 +184,26 @@ static int aout_core_dump(struct coredump_params *cprm)
 
 	set_fs(KERNEL_DS);
 	/* struct user */
-	DUMP_WRITE(&dump, sizeof(dump));
+	if (!dump_emit(cprm, &dump, sizeof(dump)))
+		goto end_coredump;
 	/* Now dump all of the user data.  Include malloced stuff as well */
-	DUMP_SEEK(PAGE_SIZE - sizeof(dump));
+	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+		goto end_coredump;
 	/* now we start writing out the user space info */
 	set_fs(USER_DS);
 	/* Dump the data area */
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start, dump_size);
+		if (!dump_emit(cprm, (void *)dump_start, dump_size))
+			goto end_coredump;
 	}
 	/* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start, dump_size);
+		if (!dump_emit(cprm, (void *)dump_start, dump_size))
+			goto end_coredump;
 	}
 end_coredump:
 	set_fs(fs);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index faaa819f70fa..a4f847f77234 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -45,7 +45,6 @@ static int load_aout_library(struct file*);
  */
 static int aout_core_dump(struct coredump_params *cprm)
 {
-	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	void __user *dump_start;
@@ -85,7 +84,7 @@ static int aout_core_dump(struct coredump_params *cprm)
 
 	set_fs(KERNEL_DS);
 /* struct user */
-	if (!dump_write(file, &dump, sizeof(dump)))
+	if (!dump_emit(cprm, &dump, sizeof(dump)))
 		goto end_coredump;
 /* Now dump all of the user data.  Include malloced stuff as well */
 	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
@@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		if (!dump_write(file, dump_start, dump_size))
+		if (!dump_emit(cprm, dump_start, dump_size))
 			goto end_coredump;
 	}
 /* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		if (!dump_write(file, dump_start, dump_size))
+		if (!dump_emit(cprm, dump_start, dump_size))
 			goto end_coredump;
 	}
 end_coredump:

From 1ad67015e619ba4e0b15ed0482d464292fedf263 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Oct 2013 07:23:45 -0400
Subject: [PATCH 060/101] binfmt_elf: count notes towards coredump limit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3bf75d767a4a..4f7dda9d86b5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2034,7 +2034,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	int has_dumped = 0;
 	mm_segment_t fs;
 	int segs;
-	size_t size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff;
@@ -2155,7 +2154,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
-	size = cprm->written;
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
@@ -2167,7 +2165,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_seek(cprm->file, dataoff - cprm->written))
 		goto end_coredump;
 
-	cprm->written = size;
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;

From 2507a4fbd48a96bc4236e584252635f8539079df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 09:11:48 -0400
Subject: [PATCH 061/101] make dump_emit() use vfs_write() instead of banging
 at ->f_op->write directly

... and deal with short writes properly - the output might be to pipe, after
all; as it is, e.g. no-MMU case of elf_fdpic coredump can write a whole lot
more than a page worth of data at one call.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coredump.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 319f973bab72..2472ed9e682c 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -696,13 +696,20 @@ EXPORT_SYMBOL(dump_write);
 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 {
 	struct file *file = cprm->file;
-	if (dump_interrupted() || !access_ok(VERIFY_READ, addr, nr))
-		return 0;
+	loff_t pos = file->f_pos;
+	ssize_t n;
 	if (cprm->written + nr > cprm->limit)
 		return 0;
-	if (file->f_op->write(file, addr, nr, &file->f_pos) != nr)
-		return 0;
-	cprm->written += nr;
+	while (nr) {
+		if (dump_interrupted())
+			return 0;
+		n = vfs_write(file, addr, nr, &pos);
+		if (n <= 0)
+			return 0;
+		file->f_pos = pos;
+		cprm->written += n;
+		nr -= n;
+	}
 	return 1;
 }
 EXPORT_SYMBOL(dump_emit);

From 9b56d54380adb5fef71f687109bbd6f8413d694f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 09:26:08 -0400
Subject: [PATCH 062/101] dump_skip(): dump_seek() replacement taking
 coredump_params

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_aout.c |  2 +-
 fs/binfmt_aout.c          |  2 +-
 fs/binfmt_elf.c           |  4 ++--
 fs/binfmt_elf_fdpic.c     | 11 +++-------
 fs/coredump.c             | 43 +++++++++++++--------------------------
 include/linux/coredump.h  |  3 +--
 6 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 9e26e9e85576..d21ff89207cd 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -187,7 +187,7 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (!dump_emit(cprm, &dump, sizeof(dump)))
 		goto end_coredump;
 	/* Now dump all of the user data.  Include malloced stuff as well */
-	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+	if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
 	/* now we start writing out the user space info */
 	set_fs(USER_DS);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a4f847f77234..ca0ba15a7306 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -87,7 +87,7 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (!dump_emit(cprm, &dump, sizeof(dump)))
 		goto end_coredump;
 /* Now dump all of the user data.  Include malloced stuff as well */
-	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
+	if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
 /* now we start writing out the user space info */
 	set_fs(USER_DS);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4f7dda9d86b5..c56ae3264a65 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2162,7 +2162,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(cprm->file, dataoff - cprm->written))
+	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2183,7 +2183,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(cprm->file, PAGE_SIZE);
+				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 70e299917898..a69fc4ae1c85 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
 				kunmap(page);
 				page_cache_release(page);
 			} else {
-				res = dump_seek(file, PAGE_SIZE);
+				res = dump_skip(cprm, PAGE_SIZE);
 			}
 			if (!res)
 				return false;
@@ -1547,11 +1547,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	int has_dumped = 0;
 	mm_segment_t fs;
 	int segs;
-	size_t size = 0;
 	int i;
 	struct vm_area_struct *vma;
 	struct elfhdr *elf = NULL;
-	loff_t offset = 0, dataoff, foffset;
+	loff_t offset = 0, dataoff;
 	int numnote;
 	struct memelfnote *notes = NULL;
 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
@@ -1682,7 +1681,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 	offset += sizeof(*elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
-	foffset = offset;
 
 	/* Write notes phdr entry */
 	{
@@ -1751,8 +1749,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_phdrs(cprm, offset))
 		goto end_coredump;
 
-	size = cprm->written;
-	cprm->written = foffset;
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
 		if (!writenote(notes + i, cprm))
@@ -1768,10 +1764,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				goto end_coredump;
 	}
 
-	if (!dump_seek(cprm->file, dataoff - cprm->written))
+	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
-	cprm->written = size;
 	if (!elf_fdpic_dump_segments(cprm))
 		goto end_coredump;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index 2472ed9e682c..18baf2c009d4 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -685,14 +685,6 @@ fail:
  * do on a core-file: use only these functions to write out all the
  * necessary info.
  */
-int dump_write(struct file *file, const void *addr, int nr)
-{
-	return !dump_interrupted() &&
-		access_ok(VERIFY_READ, addr, nr) &&
-		file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
 int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 {
 	struct file *file = cprm->file;
@@ -714,32 +706,25 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 }
 EXPORT_SYMBOL(dump_emit);
 
-int dump_seek(struct file *file, loff_t off)
+int dump_skip(struct coredump_params *cprm, size_t nr)
 {
-	int ret = 1;
-
+	static char zeroes[PAGE_SIZE];
+	struct file *file = cprm->file;
 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (cprm->written + nr > cprm->limit)
+			return 0;
 		if (dump_interrupted() ||
-		    file->f_op->llseek(file, off, SEEK_CUR) < 0)
+		    file->f_op->llseek(file, nr, SEEK_CUR) < 0)
 			return 0;
+		cprm->written += nr;
+		return 1;
 	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n)) {
-				ret = 0;
-				break;
-			}
-			off -= n;
+		while (nr > PAGE_SIZE) {
+			if (!dump_emit(cprm, zeroes, PAGE_SIZE))
+				return 0;
+			nr -= PAGE_SIZE;
 		}
-		free_page((unsigned long)buf);
+		return dump_emit(cprm, zeroes, nr);
 	}
-	return ret;
 }
-EXPORT_SYMBOL(dump_seek);
+EXPORT_SYMBOL(dump_skip);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 2959376a9ad5..07a0af93f230 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,8 +11,7 @@
  * functions to write out all the necessary info.
  */
 struct coredump_params;
-extern int dump_write(struct file *file, const void *addr, int nr);
-extern int dump_seek(struct file *file, loff_t off);
+extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);

From 7b1f4020d0d16a17b9c48570bdb7b4ad99be3553 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 09:44:29 -0400
Subject: [PATCH 063/101] spufs: get rid of dump_emit() wrappers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/coredump.c | 69 ++++++--------------
 1 file changed, 20 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 5d9b0a288f36..1aaaa586fa50 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -50,33 +50,6 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
 	return ++ret; /* count trailing NULL */
 }
 
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int spufs_dump_write(struct coredump_params *cprm, const void *addr, int nr)
-{
-	if (!dump_emit(cprm, addr, nr))
-		return -EIO;
-	return 0;
-}
-
-static int spufs_dump_align(struct coredump_params *cprm, char *buf, loff_t new_off)
-{
-	int rc, size;
-
-	size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
-	memset(buf, 0, size);
-
-	rc = 0;
-	while (rc == 0 && new_off > cprm->written) {
-		size = min((loff_t)PAGE_SIZE, new_off - cprm->written);
-		rc = spufs_dump_write(cprm, buf, size);
-	}
-
-	return rc;
-}
-
 static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
 {
 	int i, sz, total = 0;
@@ -159,7 +132,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 				  struct coredump_params *cprm, int dfd)
 {
 	loff_t pos = 0;
-	int sz, rc, nread, total = 0;
+	int sz, rc, total = 0;
 	const int bufsz = PAGE_SIZE;
 	char *name;
 	char fullname[80], *buf;
@@ -177,38 +150,36 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
-	rc = spufs_dump_write(cprm, &en, sizeof(en));
-	if (rc)
-		goto out;
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		goto Eio;
 
-	rc = spufs_dump_write(cprm, fullname, en.n_namesz);
-	if (rc)
-		goto out;
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		goto Eio;
 
-	rc = spufs_dump_align(cprm, buf, roundup(cprm->written, 4));
-	if (rc)
-		goto out;
+	if (!dump_skip(cprm, roundup(cprm->written, 4) - cprm->written))
+		goto Eio;
 
 	do {
-		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
-		if (nread > 0) {
-			rc = spufs_dump_write(cprm, buf, nread);
-			if (rc)
-				goto out;
-			total += nread;
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!dump_emit(cprm, buf, rc))
+				goto Eio;
+			total += rc;
 		}
-	} while (nread == bufsz && total < sz);
+	} while (rc == bufsz && total < sz);
 
-	if (nread < 0) {
-		rc = nread;
+	if (rc < 0)
 		goto out;
-	}
-
-	rc = spufs_dump_align(cprm, buf, roundup(cprm->written - total + sz, 4));
 
+	if (!dump_skip(cprm,
+		       roundup(cprm->written - total + sz, 4) - cprm->written))
+		goto Eio;
 out:
 	free_page((unsigned long)buf);
 	return rc;
+Eio:
+	free_page((unsigned long)buf);
+	return -EIO;
 }
 
 int spufs_coredump_extra_notes_write(struct coredump_params *cprm)

From 22a8cb8248ba5d340307ba72432253b1dbdb5cf7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Oct 2013 11:05:01 -0400
Subject: [PATCH 064/101] new helper: dump_align()

dump_skip to given alignment...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/coredump.c |  2 +-
 fs/binfmt_elf.c                              | 10 ++--------
 fs/binfmt_elf_fdpic.c                        | 10 ++--------
 fs/coredump.c                                |  9 +++++++++
 include/linux/coredump.h                     |  1 +
 5 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 1aaaa586fa50..be6212ddbf06 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -156,7 +156,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	if (!dump_emit(cprm, fullname, en.n_namesz))
 		goto Eio;
 
-	if (!dump_skip(cprm, roundup(cprm->written, 4) - cprm->written))
+	if (!dump_align(cprm, 4))
 		goto Eio;
 
 	do {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c56ae3264a65..864154972670 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1225,12 +1225,6 @@ static int notesize(struct memelfnote *en)
 	return sz;
 }
 
-static int alignfile(struct coredump_params *cprm)
-{
-	static const char buf[4] = { 0, };
-	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
-}
-
 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
@@ -1239,8 +1233,8 @@ static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 	en.n_type = men->type;
 
 	return dump_emit(cprm, &en, sizeof(en)) &&
-	    dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
-	    dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
+	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
 }
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a69fc4ae1c85..645f6e56f378 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1267,12 +1267,6 @@ static int notesize(struct memelfnote *en)
 
 /* #define DEBUG */
 
-static int alignfile(struct coredump_params *cprm)
-{
-	static const char buf[4] = { 0, };
-	return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
-}
-
 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 {
 	struct elf_note en;
@@ -1281,8 +1275,8 @@ static int writenote(struct memelfnote *men, struct coredump_params *cprm)
 	en.n_type = men->type;
 
 	return dump_emit(cprm, &en, sizeof(en)) &&
-		dump_emit(cprm, men->name, en.n_namesz) && alignfile(cprm) &&
-		dump_emit(cprm, men->data, men->datasz) && alignfile(cprm);
+		dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
+		dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
 }
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
diff --git a/fs/coredump.c b/fs/coredump.c
index 18baf2c009d4..dc1f937413d5 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -728,3 +728,12 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
 	}
 }
 EXPORT_SYMBOL(dump_skip);
+
+int dump_align(struct coredump_params *cprm, int align)
+{
+	unsigned mod = cprm->written & (align - 1);
+	if (align & (align - 1))
+		return -EINVAL;
+	return mod ? dump_skip(cprm, align - mod) : 0;
+}
+EXPORT_SYMBOL(dump_align);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 07a0af93f230..d8eb880be82a 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -13,6 +13,7 @@
 struct coredump_params;
 extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
+extern int dump_align(struct coredump_params *cprm, int align);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(siginfo_t *siginfo);
 #else

From 6987843ff7e836ea65b554905aec34d2fad05c94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 Oct 2013 22:35:11 -0400
Subject: [PATCH 065/101] take anon inode allocation to libfs.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c   | 50 ++--------------------------------------------
 fs/libfs.c         | 43 +++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  1 +
 3 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 85c961849953..22f9698a1214 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,7 +24,6 @@
 
 static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
-static const struct file_operations anon_inode_fops;
 
 /*
  * anon_inodefs_dname() is called from d_path().
@@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
 	.d_dname	= anon_inodefs_dname,
 };
 
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
-	return 0;
-};
-
-static const struct address_space_operations anon_aops = {
-	.set_page_dirty = anon_set_page_dirty,
-};
-
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(struct super_block *s)
-{
-	struct inode *inode = new_inode_pseudo(s);
-
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-
-	inode->i_ino = get_next_ino();
-	inode->i_fop = &anon_inode_fops;
-
-	inode->i_mapping->a_ops = &anon_aops;
-
-	/*
-	 * Mark the inode dirty from the very beginning,
-	 * that way it will never be moved to the dirty
-	 * list because mark_inode_dirty() will think
-	 * that it already _is_ on the dirty list.
-	 */
-	inode->i_state = I_DIRTY;
-	inode->i_mode = S_IRUSR | S_IWUSR;
-	inode->i_uid = current_fsuid();
-	inode->i_gid = current_fsgid();
-	inode->i_flags |= S_PRIVATE;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	return inode;
-}
-
 static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
@@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
 			&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
 	if (!IS_ERR(root)) {
 		struct super_block *s = root->d_sb;
-		anon_inode_inode = anon_inode_mkinode(s);
+		anon_inode_inode = alloc_anon_inode(s);
 		if (IS_ERR(anon_inode_inode)) {
 			dput(root);
 			deactivate_locked_super(s);
@@ -134,7 +88,7 @@ struct file *anon_inode_getfile_private(const char *name,
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
-	inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
+	inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
 	if (IS_ERR(inode)) {
 		file = ERR_PTR(-ENOMEM);
 		goto err_module;
diff --git a/fs/libfs.c b/fs/libfs.c
index 8c5018493154..5de06947ba5e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1002,3 +1002,46 @@ void kfree_put_link(struct dentry *dentry, struct nameidata *nd,
 		kfree(s);
 }
 EXPORT_SYMBOL(kfree_put_link);
+
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+	return 0;
+};
+
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+struct inode *alloc_anon_inode(struct super_block *s)
+{
+	static const struct address_space_operations anon_aops = {
+		.set_page_dirty = anon_set_page_dirty,
+	};
+	struct inode *inode = new_inode_pseudo(s);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_ino = get_next_ino();
+	inode->i_mapping->a_ops = &anon_aops;
+
+	/*
+	 * Mark the inode dirty from the very beginning,
+	 * that way it will never be moved to the dirty
+	 * list because mark_inode_dirty() will think
+	 * that it already _is_ on the dirty list.
+	 */
+	inode->i_state = I_DIRTY;
+	inode->i_mode = S_IRUSR | S_IWUSR;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_flags |= S_PRIVATE;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	return inode;
+}
+EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b0f4e974480..e190326ac212 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2562,6 +2562,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata);
+extern struct inode *alloc_anon_inode(struct super_block *);
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);

From 71ad7490c1f32bd7829df76360f9fa17829868f3 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 17 Sep 2013 10:18:25 -0400
Subject: [PATCH 066/101] rework aio migrate pages to use aio fs

Don't abuse anon_inodes.c to host private files needed by aio;
we can bloody well declare a mini-fs of our own instead of
patching up what anon_inodes can create for us.

Tested-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 067e3d340c35..1f602d9be4c5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,10 +36,10 @@
 #include <linux/eventfd.h>
 #include <linux/blkdev.h>
 #include <linux/compat.h>
-#include <linux/anon_inodes.h>
 #include <linux/migrate.h>
 #include <linux/ramfs.h>
 #include <linux/percpu-refcount.h>
+#include <linux/mount.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
 static struct kmem_cache	*kiocb_cachep;
 static struct kmem_cache	*kioctx_cachep;
 
+static struct vfsmount *aio_mnt;
+
+static const struct file_operations aio_ring_fops;
+static const struct address_space_operations aio_ctx_aops;
+
+static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
+{
+	struct qstr this = QSTR_INIT("[aio]", 5);
+	struct file *file;
+	struct path path;
+	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_mapping->a_ops = &aio_ctx_aops;
+	inode->i_mapping->private_data = ctx;
+	inode->i_size = PAGE_SIZE * nr_pages;
+
+	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
+	if (!path.dentry) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	path.mnt = mntget(aio_mnt);
+
+	d_instantiate(path.dentry, inode);
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
+	if (IS_ERR(file)) {
+		path_put(&path);
+		return file;
+	}
+
+	file->f_flags = O_RDWR;
+	file->private_data = ctx;
+	return file;
+}
+
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
+{
+	static const struct dentry_operations ops = {
+		.d_dname	= simple_dname,
+	};
+	return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+}
+
 /* aio_setup
  *	Creates the slab caches used by the aio routines, panic on
  *	failure as this is done early during the boot sequence.
  */
 static int __init aio_setup(void)
 {
+	static struct file_system_type aio_fs = {
+		.name		= "aio",
+		.mount		= aio_mount,
+		.kill_sb	= kill_anon_super,
+	};
+	aio_mnt = kern_mount(&aio_fs);
+	if (IS_ERR(aio_mnt))
+		panic("Failed to create aio fs mount.");
+
 	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
@@ -283,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx)
 	if (nr_pages < 0)
 		return -EINVAL;
 
-	file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+	file = aio_private_file(ctx, nr_pages);
 	if (IS_ERR(file)) {
 		ctx->aio_ring_file = NULL;
 		return -EAGAIN;
 	}
 
-	file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
-	file->f_inode->i_mapping->private_data = ctx;
-	file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
-
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;
 		page = find_or_create_page(file->f_inode->i_mapping,

From 078d8e624c1837aa8ad65e58054a4a40d7ac46d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 9 Oct 2013 10:26:28 -0400
Subject: [PATCH 067/101] ... and kill anon_inode_getfile_private()

it's a seriously misguided API, now fortunately without users.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c            | 66 -------------------------------------
 include/linux/anon_inodes.h |  3 --
 2 files changed, 69 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 22f9698a1214..24084732b1d0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -62,72 +62,6 @@ static struct file_system_type anon_inode_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-/**
- * anon_inode_getfile_private - creates a new file instance by hooking it up to an
- *                      anonymous inode, and a dentry that describe the "class"
- *                      of the file
- *
- * @name:    [in]    name of the "class" of the new file
- * @fops:    [in]    file operations for the new file
- * @priv:    [in]    private data for the new file (will be file's private_data)
- * @flags:   [in]    flags
- *
- *
- * Similar to anon_inode_getfile, but each file holds a single inode.
- *
- */
-struct file *anon_inode_getfile_private(const char *name,
-					const struct file_operations *fops,
-					void *priv, int flags)
-{
-	struct qstr this;
-	struct path path;
-	struct file *file;
-	struct inode *inode;
-
-	if (fops->owner && !try_module_get(fops->owner))
-		return ERR_PTR(-ENOENT);
-
-	inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
-	if (IS_ERR(inode)) {
-		file = ERR_PTR(-ENOMEM);
-		goto err_module;
-	}
-
-	/*
-	 * Link the inode to a directory entry by creating a unique name
-	 * using the inode sequence number.
-	 */
-	file = ERR_PTR(-ENOMEM);
-	this.name = name;
-	this.len = strlen(name);
-	this.hash = 0;
-	path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
-	if (!path.dentry)
-		goto err_module;
-
-	path.mnt = mntget(anon_inode_mnt);
-
-	d_instantiate(path.dentry, inode);
-
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
-	if (IS_ERR(file))
-		goto err_dput;
-
-	file->f_mapping = inode->i_mapping;
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
-	file->private_data = priv;
-
-	return file;
-
-err_dput:
-	path_put(&path);
-err_module:
-	module_put(fops->owner);
-	return file;
-}
-EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
-
 /**
  * anon_inode_getfile - creates a new file instance by hooking it up to an
  *                      anonymous inode, and a dentry that describe the "class"
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index cf573c22b81e..8013a45242fe 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -13,9 +13,6 @@ struct file_operations;
 struct file *anon_inode_getfile(const char *name,
 				const struct file_operations *fops,
 				void *priv, int flags);
-struct file *anon_inode_getfile_private(const char *name,
-				const struct file_operations *fops,
-				void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 

From ce3959604878c1c693979ec552069dc8bdb5ccde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 13 Oct 2013 17:23:53 -0400
Subject: [PATCH 068/101] constify copy_siginfo_to_user{,32}()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/kernel/signal32.c     | 2 +-
 arch/ia64/kernel/signal.c        | 2 +-
 arch/mips/kernel/signal32.c      | 2 +-
 arch/parisc/kernel/signal32.c    | 2 +-
 arch/parisc/kernel/signal32.h    | 2 +-
 arch/powerpc/kernel/signal_32.c  | 2 +-
 arch/s390/kernel/compat_signal.c | 2 +-
 arch/sparc/kernel/signal32.c     | 2 +-
 arch/tile/kernel/compat_signal.c | 2 +-
 arch/x86/ia32/ia32_signal.c      | 2 +-
 fs/binfmt_elf.c                  | 2 +-
 include/asm-generic/siginfo.h    | 2 +-
 include/linux/compat.h           | 2 +-
 kernel/signal.c                  | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174fe859..07da8c0fde2e 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -150,7 +150,7 @@ static inline int get_sigset_t(sigset_t *set,
 	return 0;
 }
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 3637e03d2282..33cab9a8adff 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -105,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
 }
 
 int
-copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
 {
 	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
 		return -EFAULT;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 57de8b751627..1905a419aa46 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -314,7 +314,7 @@ SYSCALL_DEFINE3(32_sigaction, long, sig, const struct compat_sigaction __user *,
 	return ret;
 }
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index 6c6a271a6140..984abbee71ca 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -319,7 +319,7 @@ copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
 }
 
 int
-copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	compat_uptr_t addr;
 	compat_int_t val;
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index 72ab41a51f32..af51d4ccee42 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -34,7 +34,7 @@ struct compat_ucontext {
 
 /* ELF32 signal handling */
 
-int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from);
+int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from);
 int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
 
 /* In a deft move of uber-hackery, we decide to carry the top half of all
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index bebdf1a1a540..149cd583d061 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -891,7 +891,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 #endif
 
 #ifdef CONFIG_PPC64
-int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s)
+int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
 {
 	int err;
 
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 1389b637dae5..44b25269a970 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -49,7 +49,7 @@ typedef struct
 	__u32 gprs_high[NUM_GPRS];
 } rt_sigframe32;
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index b524f91dd0e5..ee789d2ef05d 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -68,7 +68,7 @@ struct rt_signal_frame32 {
 	/* __siginfo_rwin_t * */u32 rwin_save;
 } __attribute__((aligned(8)));
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 85e00b2f39bf..19c04b5ce408 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -49,7 +49,7 @@ struct compat_rt_sigframe {
 	struct compat_ucontext uc;
 };
 
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from)
 {
 	int err;
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 665a730307f2..220675795e08 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,7 +34,7 @@
 #include <asm/sys_ia32.h>
 #include <asm/smap.h>
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err = 0;
 	bool ia32 = test_thread_flag(TIF_IA32);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 864154972670..a6f8cb498b0e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1374,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 }
 
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
-		siginfo_t *siginfo)
+		const siginfo_t *siginfo)
 {
 	mm_segment_t old_fs = get_fs();
 	set_fs(KERNEL_DS);
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index b685d3bd32e2..3d1a3af5cf59 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -32,6 +32,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
 
 #endif
 
-extern int copy_siginfo_to_user(struct siginfo __user *to, struct siginfo *from);
+extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
 
 #endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 345da00a86e0..78cdf51ff5ba 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -362,7 +362,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
 int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
diff --git a/kernel/signal.c b/kernel/signal.c
index ded28b91fa53..940b30ee9a30 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2723,7 +2723,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
 
 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
 
-int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 {
 	int err;
 

From ec57941e031685de434916e5398d0ca1d44cd374 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 13 Oct 2013 17:57:29 -0400
Subject: [PATCH 069/101] constify do_coredump() argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c          | 4 ++--
 fs/coredump.c            | 2 +-
 include/linux/binfmts.h  | 2 +-
 include/linux/coredump.h | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a6f8cb498b0e..1af412b96e06 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1581,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  siginfo_t *siginfo, struct pt_regs *regs)
+			  const siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct task_struct *dump_task = current;
 	const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1830,7 +1830,7 @@ static int elf_note_info_init(struct elf_note_info *info)
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  siginfo_t *siginfo, struct pt_regs *regs)
+			  const siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct list_head *t;
 
diff --git a/fs/coredump.c b/fs/coredump.c
index dc1f937413d5..62406b6959b6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 	return err;
 }
 
-void do_coredump(siginfo_t *siginfo)
+void do_coredump(const siginfo_t *siginfo)
 {
 	struct core_state core_state;
 	struct core_name cn;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 8aa507e7a41a..790d3305a5a7 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -56,7 +56,7 @@ struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
-	siginfo_t *siginfo;
+	const siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
 	unsigned long limit;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d8eb880be82a..d016a121a8c4 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -15,9 +15,9 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 #ifdef CONFIG_COREDUMP
-extern void do_coredump(siginfo_t *siginfo);
+extern void do_coredump(const siginfo_t *siginfo);
 #else
-static inline void do_coredump(siginfo_t *siginfo) {}
+static inline void do_coredump(const siginfo_t *siginfo) {}
 #endif
 
 #endif /* _LINUX_COREDUMP_H */

From afabada957a6b28abfd37eb52efeefdfe6871c4b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 14 Oct 2013 07:39:56 -0400
Subject: [PATCH 070/101] elf{,_fdpic} coredump: get rid of pointless if
 (siginfo->si_signo)

we can't get to do_coredump() if that condition isn't satisfied...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_elf.c       | 33 +++++++++++++++------------------
 fs/binfmt_elf_fdpic.c | 37 +++++++++++++++++--------------------
 2 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1af412b96e06..571a42326908 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1833,31 +1833,28 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  const siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct list_head *t;
+	struct core_thread *ct;
+	struct elf_thread_status *ets;
 
 	if (!elf_note_info_init(info))
 		return 0;
 
-	if (siginfo->si_signo) {
-		struct core_thread *ct;
-		struct elf_thread_status *ets;
+	for (ct = current->mm->core_state->dumper.next;
+					ct; ct = ct->next) {
+		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+		if (!ets)
+			return 0;
 
-		for (ct = current->mm->core_state->dumper.next;
-						ct; ct = ct->next) {
-			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
-			if (!ets)
-				return 0;
+		ets->thread = ct->task;
+		list_add(&ets->list, &info->thread_list);
+	}
 
-			ets->thread = ct->task;
-			list_add(&ets->list, &info->thread_list);
-		}
+	list_for_each(t, &info->thread_list) {
+		int sz;
 
-		list_for_each(t, &info->thread_list) {
-			int sz;
-
-			ets = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(siginfo->si_signo, ets);
-			info->thread_status_size += sz;
-		}
+		ets = list_entry(t, struct elf_thread_status, list);
+		sz = elf_dump_thread_status(siginfo->si_signo, ets);
+		info->thread_status_size += sz;
 	}
 	/* now collect the dump for the current */
 	memset(info->prstatus, 0, sizeof(*info->prstatus));
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 645f6e56f378..fe2a643ee005 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1561,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
+	struct core_thread *ct;
+	struct elf_thread_status *tmp;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1596,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		goto cleanup;
 #endif
 
-	if (cprm->siginfo->si_signo) {
-		struct core_thread *ct;
+	for (ct = current->mm->core_state->dumper.next;
+					ct; ct = ct->next) {
+		tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+		if (!tmp)
+			goto cleanup;
+
+		tmp->thread = ct->task;
+		list_add(&tmp->list, &thread_list);
+	}
+
+	list_for_each(t, &thread_list) {
 		struct elf_thread_status *tmp;
+		int sz;
 
-		for (ct = current->mm->core_state->dumper.next;
-						ct; ct = ct->next) {
-			tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-			if (!tmp)
-				goto cleanup;
-
-			tmp->thread = ct->task;
-			list_add(&tmp->list, &thread_list);
-		}
-
-		list_for_each(t, &thread_list) {
-			struct elf_thread_status *tmp;
-			int sz;
-
-			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
-			thread_status_size += sz;
-		}
+		tmp = list_entry(t, struct elf_thread_status, list);
+		sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
+		thread_status_size += sz;
 	}
 
 	/* now collect the dump for the current */

From b18825a7c8e37a7cf6abb97a12a6ad71af160de7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Sep 2013 19:22:53 +0100
Subject: [PATCH 071/101] VFS: Put a small type field into struct
 dentry::d_flags

Put a type field into struct dentry::d_flags to indicate if the dentry is one
of the following types that relate particularly to pathwalk:

	Miss (negative dentry)
	Directory
	"Automount" directory (defective - no i_op->lookup())
	Symlink
	Other (regular, socket, fifo, device)

The type field is set to one of the first five types on a dentry by calls to
__d_instantiate() and d_obtain_alias() from information in the inode (if one is
given).

The type is cleared by dentry_unlink_inode() when it reconstitutes an existing
dentry as a negative dentry.

Accessors provided are:

	d_set_type(dentry, type)
	d_is_directory(dentry)
	d_is_autodir(dentry)
	d_is_symlink(dentry)
	d_is_file(dentry)
	d_is_negative(dentry)
	d_is_positive(dentry)

A bunch of checks in pathname resolution switched to those.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            |  42 +++++++++++++++--
 fs/namei.c             |  95 +++++++++++++++----------------------
 include/linux/dcache.h | 103 +++++++++++++++++++++++++++++++++--------
 3 files changed, 158 insertions(+), 82 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index fb7bcf3ba5d6..525770e576db 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
+	__d_clear_type(dentry);
 	dentry->d_inode = NULL;
 	hlist_del_init(&dentry->d_alias);
 	dentry_rcuwalk_barrier(dentry);
@@ -1648,14 +1649,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 }
 EXPORT_SYMBOL(d_set_d_op);
 
+static unsigned d_flags_for_inode(struct inode *inode)
+{
+	unsigned add_flags = DCACHE_FILE_TYPE;
+
+	if (!inode)
+		return DCACHE_MISS_TYPE;
+
+	if (S_ISDIR(inode->i_mode)) {
+		add_flags = DCACHE_DIRECTORY_TYPE;
+		if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
+			if (unlikely(!inode->i_op->lookup))
+				add_flags = DCACHE_AUTODIR_TYPE;
+			else
+				inode->i_opflags |= IOP_LOOKUP;
+		}
+	} else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
+		if (unlikely(inode->i_op->follow_link))
+			add_flags = DCACHE_SYMLINK_TYPE;
+		else
+			inode->i_opflags |= IOP_NOFOLLOW;
+	}
+
+	if (unlikely(IS_AUTOMOUNT(inode)))
+		add_flags |= DCACHE_NEED_AUTOMOUNT;
+	return add_flags;
+}
+
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
+	unsigned add_flags = d_flags_for_inode(inode);
+
 	spin_lock(&dentry->d_lock);
-	if (inode) {
-		if (unlikely(IS_AUTOMOUNT(inode)))
-			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+	dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
+	dentry->d_flags |= add_flags;
+	if (inode)
 		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
-	}
 	dentry->d_inode = inode;
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1860,6 +1889,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	static const struct qstr anonstring = QSTR_INIT("/", 1);
 	struct dentry *tmp;
 	struct dentry *res;
+	unsigned add_flags;
 
 	if (!inode)
 		return ERR_PTR(-ESTALE);
@@ -1885,9 +1915,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	}
 
 	/* attach a disconnected dentry */
+	add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
-	tmp->d_flags |= DCACHE_DISCONNECTED;
+	tmp->d_flags |= add_flags;
 	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
diff --git a/fs/namei.c b/fs/namei.c
index e5c0118ba9f8..e1fa43346c61 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1501,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
  * so we keep a cache of "no, this doesn't need follow_link"
  * for the common case.
  */
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
 {
-	if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
-		if (likely(inode->i_op->follow_link))
-			return follow;
-
-		/* This gets set once for the inode lifetime */
-		spin_lock(&inode->i_lock);
-		inode->i_opflags |= IOP_NOFOLLOW;
-		spin_unlock(&inode->i_lock);
-	}
-	return 0;
+	return unlikely(d_is_symlink(dentry)) ? follow : 0;
 }
 
 static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1542,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 	if (!inode)
 		goto out_path_put;
 
-	if (should_follow_link(inode, follow)) {
+	if (should_follow_link(path->dentry, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				err = -ECHILD;
@@ -1600,26 +1591,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 	return res;
 }
 
-/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
-	if (likely(inode->i_opflags & IOP_LOOKUP))
-		return 1;
-	if (likely(!inode->i_op->lookup))
-		return 0;
-
-	/* We do this once for the lifetime of the inode */
-	spin_lock(&inode->i_lock);
-	inode->i_opflags |= IOP_LOOKUP;
-	spin_unlock(&inode->i_lock);
-	return 1;
-}
-
 /*
  * We can do the critical dentry name comparison and hashing
  * operations one word at a time, but we are limited to:
@@ -1823,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			if (err)
 				return err;
 		}
-		if (!can_lookup(nd->inode)) {
+		if (!d_is_directory(nd->path.dentry)) {
 			err = -ENOTDIR; 
 			break;
 		}
@@ -1841,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 	nd->flags = flags | LOOKUP_JUMPED;
 	nd->depth = 0;
 	if (flags & LOOKUP_ROOT) {
-		struct inode *inode = nd->root.dentry->d_inode;
+		struct dentry *root = nd->root.dentry;
+		struct inode *inode = root->d_inode;
 		if (*name) {
-			if (!can_lookup(inode))
+			if (!d_is_directory(root))
 				return -ENOTDIR;
 			retval = inode_permission(inode, MAY_EXEC);
 			if (retval)
@@ -1899,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		dentry = f.file->f_path.dentry;
 
 		if (*name) {
-			if (!can_lookup(dentry->d_inode)) {
+			if (!d_is_directory(dentry)) {
 				fdput(f);
 				return -ENOTDIR;
 			}
@@ -1981,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
 		err = complete_walk(nd);
 
 	if (!err && nd->flags & LOOKUP_DIRECTORY) {
-		if (!can_lookup(nd->inode)) {
+		if (!d_is_directory(nd->path.dentry)) {
 			path_put(&nd->path);
 			err = -ENOTDIR;
 		}
@@ -2273,7 +2245,7 @@ done:
 	}
 	path->dentry = dentry;
 	path->mnt = mntget(nd->path.mnt);
-	if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+	if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
 		return 1;
 	follow_mount(path);
 	error = 0;
@@ -2417,12 +2389,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
  *     nfs_async_unlink().
  */
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 {
+	struct inode *inode = victim->d_inode;
 	int error;
 
-	if (!victim->d_inode)
+	if (d_is_negative(victim))
 		return -ENOENT;
+	BUG_ON(!inode);
 
 	BUG_ON(victim->d_parent->d_inode != dir);
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2432,15 +2406,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return error;
 	if (IS_APPEND(dir))
 		return -EPERM;
-	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+	if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+	    IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
 		return -EPERM;
 	if (isdir) {
-		if (!S_ISDIR(victim->d_inode->i_mode))
+		if (!d_is_directory(victim) && !d_is_autodir(victim))
 			return -ENOTDIR;
 		if (IS_ROOT(victim))
 			return -EBUSY;
-	} else if (S_ISDIR(victim->d_inode->i_mode))
+	} else if (d_is_directory(victim) || d_is_autodir(victim))
 		return -EISDIR;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
@@ -2974,7 +2949,7 @@ retry_lookup:
 	/*
 	 * create/update audit record if it already exists.
 	 */
-	if (path->dentry->d_inode)
+	if (d_is_positive(path->dentry))
 		audit_inode(name, path->dentry, 0);
 
 	/*
@@ -3003,12 +2978,12 @@ retry_lookup:
 finish_lookup:
 	/* we _can_ be in RCU mode here */
 	error = -ENOENT;
-	if (!inode) {
+	if (d_is_negative(path->dentry)) {
 		path_to_nameidata(path, nd);
 		goto out;
 	}
 
-	if (should_follow_link(inode, !symlink_ok)) {
+	if (should_follow_link(path->dentry, !symlink_ok)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				error = -ECHILD;
@@ -3037,10 +3012,11 @@ finish_open:
 	}
 	audit_inode(name, nd->path.dentry, 0);
 	error = -EISDIR;
-	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+	if ((open_flag & O_CREAT) &&
+	    (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
 		goto out;
 	error = -ENOTDIR;
-	if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+	if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
 		goto out;
 	if (!S_ISREG(nd->inode->i_mode))
 		will_truncate = false;
@@ -3266,7 +3242,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 	nd.root.mnt = mnt;
 	nd.root.dentry = dentry;
 
-	if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+	if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
 		return ERR_PTR(-ELOOP);
 
 	file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3316,8 +3292,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
 		goto unlock;
 
 	error = -EEXIST;
-	if (dentry->d_inode)
+	if (d_is_positive(dentry))
 		goto fail;
+
 	/*
 	 * Special case - lookup gave negative, but... we had foo/bar/
 	 * From the vfs_mknod() POV we just have a negative dentry -
@@ -3706,7 +3683,7 @@ retry:
 		if (nd.last.name[nd.last.len])
 			goto slashes;
 		inode = dentry->d_inode;
-		if (!inode)
+		if (d_is_negative(dentry))
 			goto slashes;
 		ihold(inode);
 		error = security_path_unlink(&nd.path, dentry);
@@ -3731,8 +3708,12 @@ exit1:
 	return error;
 
 slashes:
-	error = !dentry->d_inode ? -ENOENT :
-		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+	if (d_is_negative(dentry))
+		error = -ENOENT;
+	else if (d_is_directory(dentry) || d_is_autodir(dentry))
+		error = -EISDIR;
+	else
+		error = -ENOTDIR;
 	goto exit2;
 }
 
@@ -4046,7 +4027,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry)
 {
 	int error;
-	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+	int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
 	const unsigned char *old_name;
 
 	if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4134,10 +4115,10 @@ retry:
 		goto exit3;
 	/* source must exist */
 	error = -ENOENT;
-	if (!old_dentry->d_inode)
+	if (d_is_negative(old_dentry))
 		goto exit4;
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
-	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+	if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit4;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 716c3760ee39..57e87e749a48 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -169,13 +169,13 @@ struct dentry_operations {
  */
 
 /* d_flags entries */
-#define DCACHE_OP_HASH		0x0001
-#define DCACHE_OP_COMPARE	0x0002
-#define DCACHE_OP_REVALIDATE	0x0004
-#define DCACHE_OP_DELETE	0x0008
-#define DCACHE_OP_PRUNE         0x0010
+#define DCACHE_OP_HASH			0x00000001
+#define DCACHE_OP_COMPARE		0x00000002
+#define DCACHE_OP_REVALIDATE		0x00000004
+#define DCACHE_OP_DELETE		0x00000008
+#define DCACHE_OP_PRUNE			0x00000010
 
-#define	DCACHE_DISCONNECTED	0x0020
+#define	DCACHE_DISCONNECTED		0x00000020
      /* This dentry is possibly not currently connected to the dcache tree, in
       * which case its parent will either be itself, or will have this flag as
       * well.  nfsd will not use a dentry with this bit set, but will first
@@ -186,30 +186,38 @@ struct dentry_operations {
       * dentry into place and return that dentry rather than the passed one,
       * typically using d_splice_alias. */
 
-#define DCACHE_REFERENCED	0x0040  /* Recently used, don't discard. */
-#define DCACHE_RCUACCESS	0x0080	/* Entry has ever been RCU-visible */
+#define DCACHE_REFERENCED		0x00000040 /* Recently used, don't discard. */
+#define DCACHE_RCUACCESS		0x00000080 /* Entry has ever been RCU-visible */
 
-#define DCACHE_CANT_MOUNT	0x0100
-#define DCACHE_GENOCIDE		0x0200
-#define DCACHE_SHRINK_LIST	0x0400
+#define DCACHE_CANT_MOUNT		0x00000100
+#define DCACHE_GENOCIDE			0x00000200
+#define DCACHE_SHRINK_LIST		0x00000400
 
-#define DCACHE_OP_WEAK_REVALIDATE	0x0800
+#define DCACHE_OP_WEAK_REVALIDATE	0x00000800
 
-#define DCACHE_NFSFS_RENAMED	0x1000
+#define DCACHE_NFSFS_RENAMED		0x00001000
      /* this dentry has been "silly renamed" and has to be deleted on the last
       * dput() */
-#define DCACHE_COOKIE		0x2000	/* For use by dcookie subsystem */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
+#define DCACHE_COOKIE			0x00002000 /* For use by dcookie subsystem */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x00004000
      /* Parent inode is watched by some fsnotify listener */
 
-#define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
+#define DCACHE_DENTRY_KILLED		0x00008000
+
+#define DCACHE_MOUNTED			0x00010000 /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT		0x00020000 /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT		0x00040000 /* manage transit from this dirent */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
-#define DCACHE_LRU_LIST		0x80000
-#define DCACHE_DENTRY_KILLED	0x100000
+#define DCACHE_LRU_LIST			0x00080000
+
+#define DCACHE_ENTRY_TYPE		0x00700000
+#define DCACHE_MISS_TYPE		0x00000000 /* Negative dentry */
+#define DCACHE_DIRECTORY_TYPE		0x00100000 /* Normal directory */
+#define DCACHE_AUTODIR_TYPE		0x00200000 /* Lookupless directory (presumed automount) */
+#define DCACHE_SYMLINK_TYPE		0x00300000 /* Symlink */
+#define DCACHE_FILE_TYPE		0x00400000 /* Other file type */
 
 extern seqlock_t rename_lock;
 
@@ -394,6 +402,61 @@ static inline bool d_mountpoint(const struct dentry *dentry)
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
+/*
+ * Directory cache entry type accessor functions.
+ */
+static inline void __d_set_type(struct dentry *dentry, unsigned type)
+{
+	dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type;
+}
+
+static inline void __d_clear_type(struct dentry *dentry)
+{
+	__d_set_type(dentry, DCACHE_MISS_TYPE);
+}
+
+static inline void d_set_type(struct dentry *dentry, unsigned type)
+{
+	spin_lock(&dentry->d_lock);
+	__d_set_type(dentry, type);
+	spin_unlock(&dentry->d_lock);
+}
+
+static inline unsigned __d_entry_type(const struct dentry *dentry)
+{
+	return dentry->d_flags & DCACHE_ENTRY_TYPE;
+}
+
+static inline bool d_is_directory(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
+}
+
+static inline bool d_is_autodir(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
+}
+
+static inline bool d_is_symlink(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;
+}
+
+static inline bool d_is_file(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_FILE_TYPE;
+}
+
+static inline bool d_is_negative(const struct dentry *dentry)
+{
+	return __d_entry_type(dentry) == DCACHE_MISS_TYPE;
+}
+
+static inline bool d_is_positive(const struct dentry *dentry)
+{
+	return !d_is_negative(dentry);
+}
+
 extern int sysctl_vfs_cache_pressure;
 
 static inline unsigned long vfs_pressure_ratio(unsigned long val)

From 5a3cd99285dc793a4022fa75ceeb323eb6d29ac9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Nov 2013 09:54:52 -0500
Subject: [PATCH 072/101] iget/iget5: don't bother with ->i_lock until we find
 a match

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/porting           |  8 +++++++
 drivers/staging/lustre/lustre/llite/namei.c |  2 +-
 fs/inode.c                                  | 24 +++++++--------------
 fs/ntfs/inode.c                             |  2 +-
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f0890581f7f6..fe2b7ae6f962 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -455,3 +455,11 @@ in your dentry operations instead.
 	vfs_follow_link has been removed.  Filesystems must use nd_set_link
 	from ->follow_link for normal symlinks, or nd_jump_link for magic
 	/proc/<pid> style links.
+--
+[mandatory]
+	iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be
+	called with both ->i_lock and inode_hash_lock held; the former is *not*
+	taken anymore, so verify that your callbacks do not rely on it (none
+	of the in-tree instances did).  inode_hash_lock is still held,
+	of course, so they are still serialized wrt removal from inode hash,
+	as well as wrt set() callback of iget5_locked().
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 34815b550e71..90bbdae824ac 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -83,7 +83,7 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh)
 }
 
 
-/* called from iget5_locked->find_inode() under inode_lock spinlock */
+/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
 static int ll_test_inode(struct inode *inode, void *opaque)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
diff --git a/fs/inode.c b/fs/inode.c
index b33ba8e021cc..bf7a8b3ab937 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -773,15 +773,11 @@ static struct inode *find_inode(struct super_block *sb,
 
 repeat:
 	hlist_for_each_entry(inode, head, i_hash) {
+		if (inode->i_sb != sb)
+			continue;
+		if (!test(inode, data))
+			continue;
 		spin_lock(&inode->i_lock);
-		if (inode->i_sb != sb) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
-		if (!test(inode, data)) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
@@ -804,15 +800,11 @@ static struct inode *find_inode_fast(struct super_block *sb,
 
 repeat:
 	hlist_for_each_entry(inode, head, i_hash) {
+		if (inode->i_ino != ino)
+			continue;
+		if (inode->i_sb != sb)
+			continue;
 		spin_lock(&inode->i_lock);
-		if (inode->i_ino != ino) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
-		if (inode->i_sb != sb) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2778b0255dc6..ffb9b3675736 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -55,7 +55,7 @@
  *
  * Return 1 if the attributes match and 0 if not.
  *
- * NOTE: This function runs with the inode->i_lock spin lock held so it is not
+ * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
  * allowed to sleep.
  */
 int ntfs_test_inode(struct inode *vi, ntfs_attr *na)

From b7a6ec52dd4eced4a9bcda9ca85b3c8af84d3c90 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 2 Oct 2013 17:01:18 -0400
Subject: [PATCH 073/101] vfs: split out vfs_getattr_nosec

The filehandle lookup code wants this version of getattr.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c          | 31 +++++++++++++++++++++++++------
 include/linux/fs.h |  1 +
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index d0ea7ef75e26..ae0c3cef9927 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 
 EXPORT_SYMBOL(generic_fillattr);
 
-int vfs_getattr(struct path *path, struct kstat *stat)
+/**
+ * vfs_getattr_nosec - getattr without security checks
+ * @path: file to get attributes from
+ * @stat: structure to return attributes in
+ *
+ * Get attributes without calling security_inode_getattr.
+ *
+ * Currently the only caller other than vfs_getattr is internal to the
+ * filehandle lookup code, which uses only the inode number and returns
+ * no attributes to any user.  Any other code probably wants
+ * vfs_getattr.
+ */
+int vfs_getattr_nosec(struct path *path, struct kstat *stat)
 {
 	struct inode *inode = path->dentry->d_inode;
-	int retval;
-
-	retval = security_inode_getattr(path->mnt, path->dentry);
-	if (retval)
-		return retval;
 
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat)
 	return 0;
 }
 
+EXPORT_SYMBOL(vfs_getattr_nosec);
+
+int vfs_getattr(struct path *path, struct kstat *stat)
+{
+	int retval;
+
+	retval = security_inode_getattr(path->mnt, path->dentry);
+	if (retval)
+		return retval;
+	return vfs_getattr_nosec(path, stat);
+}
+
 EXPORT_SYMBOL(vfs_getattr);
 
 int vfs_fstat(unsigned int fd, struct kstat *stat)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e190326ac212..5e44b0893db8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2504,6 +2504,7 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
+int vfs_getattr_nosec(struct path *path, struct kstat *stat);
 extern int vfs_getattr(struct path *, struct kstat *);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);

From 950ee9566a5b6cc45d15f5fe044bab4f1e8b62cb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 10 Sep 2013 11:41:12 -0400
Subject: [PATCH 074/101] exportfs: fix 32-bit nfsd handling of 64-bit inode
 numbers

Symptoms were spurious -ENOENTs on stat of an NFS filesystem from a
32-bit NFS server exporting a very large XFS filesystem, when the
server's cache is cold (so the inodes in question are not in cache).

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reported-by: Trevor Cordes <trevor@tecnopolis.ca>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a235f0016889..c43fe9b39ff2 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -215,7 +215,7 @@ struct getdents_callback {
 	struct dir_context ctx;
 	char *name;		/* name that was found. It already points to a
 				   buffer NAME_MAX+1 is size */
-	unsigned long ino;	/* the inum we are looking for */
+	u64 ino;		/* the inum we are looking for */
 	int found;		/* inode matched? */
 	int sequence;		/* sequence counter */
 };
@@ -255,10 +255,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 	struct inode *dir = path->dentry->d_inode;
 	int error;
 	struct file *file;
+	struct kstat stat;
+	struct path child_path = {
+		.mnt = path->mnt,
+		.dentry = child,
+	};
 	struct getdents_callback buffer = {
 		.ctx.actor = filldir_one,
 		.name = name,
-		.ino = child->d_inode->i_ino
 	};
 
 	error = -ENOTDIR;
@@ -267,6 +271,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 	error = -EINVAL;
 	if (!dir->i_fop)
 		goto out;
+	/*
+	 * inode->i_ino is unsigned long, kstat->ino is u64, so the
+	 * former would be insufficient on 32-bit hosts when the
+	 * filesystem supports 64-bit inode numbers.  So we need to
+	 * actually call ->getattr, not just read i_ino:
+	 */
+	error = vfs_getattr_nosec(&child_path, &stat);
+	if (error)
+		return error;
+	buffer.ino = stat.ino;
 	/*
 	 * Open the directory ...
 	 */

From fbad2bd13235067eef0b5b5e18b40e84ca425815 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 3 Nov 2013 19:46:35 -0500
Subject: [PATCH 075/101] qnx4: i_sb is never NULL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/qnx4/namei.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index d024505ba007..e62c8183777a 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -60,10 +60,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
 	struct buffer_head *bh;
 
 	*res_dir = NULL;
-	if (!dir->i_sb) {
-		printk(KERN_WARNING "qnx4: no superblock on dir.\n");
-		return NULL;
-	}
 	bh = NULL;
 	block = offset = blkofs = 0;
 	while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {

From b19f13367454a57cd584031f0d52b05ac5ea97cd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 3 Nov 2013 19:49:19 -0500
Subject: [PATCH 076/101] ocfs2: get rid of impossible checks

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/inode.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f87f9bd1edff..f29a90fde619 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -386,19 +386,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 	u32 generation = 0;
 
 	status = -EINVAL;
-	if (inode == NULL || inode->i_sb == NULL) {
-		mlog(ML_ERROR, "bad inode\n");
-		return status;
-	}
 	sb = inode->i_sb;
 	osb = OCFS2_SB(sb);
 
-	if (!args) {
-		mlog(ML_ERROR, "bad inode args\n");
-		make_bad_inode(inode);
-		return status;
-	}
-
 	/*
 	 * To improve performance of cold-cache inode stats, we take
 	 * the cluster lock here if possible.

From 7632e465feb182cadc3c9aa1282a057201818a8c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 28 Jun 2012 12:10:55 -0400
Subject: [PATCH 077/101] dcache: use IS_ROOT to decide where dentry is hashed

Every hashed dentry is either hashed in the dentry_hashtable, or a
superblock's s_anon list.

__d_drop() assumes it can determine which is the case by checking
DCACHE_DISCONNECTED; this is not true.

It is true that when DCACHE_DISCONNECTED is cleared, the dentry is not
only hashed on dentry_hashtable, but is fully connected to its parents
back to the root.

But the converse is *not* true: fs/exportfs/expfs.c:reconnect_path()
attempts to connect a directory (found by filehandle lookup) back to
root by ascending to parents and performing lookups one at a time.  It
does not clear DCACHE_DISCONNECTED until it's done, and that is not at
all an atomic process.

In particular, it is possible for DCACHE_DISCONNECTED to be set on a
dentry which is hashed on the dentry_hashtable.

Instead, use IS_ROOT() to check which hash chain a dentry is on.  This
*does* work:

Dentries are hashed only by:

	- d_obtain_alias, which adds an IS_ROOT() dentry to sb_anon.

	- __d_rehash, called by _d_rehash: hashes to the dentry's
	  parent, and all callers of _d_rehash appear to have d_parent
	  set to a "real" parent.
	- __d_rehash, called by __d_move: rehashes the moved dentry to
	  hash chain determined by target, and assigns target's d_parent
	  to its d_parent, before dropping the dentry's d_lock.

Therefore I believe it's safe for a holder of a dentry's d_lock to
assume that it is hashed on sb_anon if and only if IS_ROOT(dentry) is
true.

I believe the incorrect assumption about DCACHE_DISCONNECTED was
originally introduced by ceb5bdc2d246 "fs: dcache per-bucket dcache hash
locking".

Also add a comment while we're here.

Cc: Nick Piggin <npiggin@kernel.dk>
Acked-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 525770e576db..c2add37082b6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -503,7 +503,12 @@ void __d_drop(struct dentry *dentry)
 {
 	if (!d_unhashed(dentry)) {
 		struct hlist_bl_head *b;
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+		/*
+		 * Hashed dentries are normally on the dentry hashtable,
+		 * with the exception of those newly allocated by
+		 * d_obtain_alias, which are always IS_ROOT:
+		 */
+		if (unlikely(IS_ROOT(dentry)))
 			b = &dentry->d_sb->s_anon;
 		else
 			b = d_hash(dentry->d_parent, dentry->d_name.hash);

From e1a24bb0aa6abaa86b5a95638722ea2036dbaadd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 29 Jun 2012 16:20:47 -0400
Subject: [PATCH 078/101] dcache: Don't set DISCONNECTED on "pseudo filesystem"
 dentries

I can't for the life of me see any reason why anyone should care whether
a dentry that is never hooked into the dentry cache would need
DCACHE_DISCONNECTED set.

This originates from 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 "fs:
improve scalability of pseudo filesystems", which probably just made the
false assumption the DCACHE_DISCONNECTED was meant to be set on anything
not connected to a parent somehow.

So this is just confusing.  Ideally the only uses of DCACHE_DISCONNECTED
would be in the filehandle-lookup code, which needs it to ensure
dentries are connected into the dentry tree before use.

I left d_alloc_pseudo there even though it's now equivalent to
__d_alloc(), just on the theory the name is better documentation of its
intended use outside dcache.c.

Cc: Nick Piggin <npiggin@kernel.dk>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index c2add37082b6..67662beebc72 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1607,12 +1607,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+/**
+ * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
+ * @sb: the superblock
+ * @name: qstr of the name
+ *
+ * For a filesystem that just pins its dentries in memory and never
+ * performs lookups at all, return an unhashed IS_ROOT dentry.
+ */
 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
 {
-	struct dentry *dentry = __d_alloc(sb, name);
-	if (dentry)
-		dentry->d_flags |= DCACHE_DISCONNECTED;
-	return dentry;
+	return __d_alloc(sb, name);
 }
 EXPORT_SYMBOL(d_alloc_pseudo);
 

From f80de2cde10350b8d146e375ff8b634e72e6a827 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 18 Jul 2012 16:27:37 -0600
Subject: [PATCH 079/101] dcache: don't clear DCACHE_DISCONNECTED too early

DCACHE_DISCONNECTED should not be cleared until we're sure the dentry is
connected all the way up to the root of the filesystem.  It *shouldn't*
be cleared as soon as the dentry is connected to a parent.  That will
cause bugs at least on exportable filesystems.

Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 67662beebc72..72c99b88d264 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2757,7 +2757,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 	spin_unlock(&dentry->d_lock);
 
 	/* anon->d_lock still locked, returns locked */
-	anon->d_flags &= ~DCACHE_DISCONNECTED;
 }
 
 /**

From 13a2c3be03973d61b6cb89ff870e758c86327bb7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 23 Oct 2013 16:09:16 -0400
Subject: [PATCH 080/101] dcache: fix outdated DCACHE_NEED_LOOKUP comment

The DCACHE_NEED_LOOKUP case referred to here was removed with
39e3c9553f34381a1b664c27b0c696a266a5735e "vfs: remove
DCACHE_NEED_LOOKUP".

There are only four real_lookup() callers and all of them pass in an
unhashed dentry just returned from d_alloc.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index e1fa43346c61..2a5a7aa9f43f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1298,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 }
 
 /*
- * Call i_op->lookup on the dentry.  The dentry must be negative but may be
- * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ * Call i_op->lookup on the dentry.  The dentry must be negative and
+ * unhashed.
  *
  * dir->d_inode->i_mutex must be held
  */

From 854ff5caabb5974b7464b438aba0bc47f1b6cf34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Oct 2013 15:48:53 -0400
Subject: [PATCH 081/101] exportfs: BUG_ON in crazy corner case

This would indicate a nasty bug in the dcache and has never triggered in
the past 10 years as far as I know.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c43fe9b39ff2..6d0a7fa9abb3 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -112,18 +112,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 	while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
 		struct dentry *pd = find_disconnected_root(target_dir);
 
+		BUG_ON(pd == mnt->mnt_sb->s_root);
+
 		if (!IS_ROOT(pd)) {
 			/* must have found a connected parent - great */
 			spin_lock(&pd->d_lock);
 			pd->d_flags &= ~DCACHE_DISCONNECTED;
 			spin_unlock(&pd->d_lock);
 			noprogress = 0;
-		} else if (pd == mnt->mnt_sb->s_root) {
-			printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n");
-			spin_lock(&pd->d_lock);
-			pd->d_flags &= ~DCACHE_DISCONNECTED;
-			spin_unlock(&pd->d_lock);
-			noprogress = 0;
 		} else {
 			/*
 			 * We have hit the top of a disconnected path, try to

From 78cee9a8e4b42b3f585ea3bd1c076f5a76fee722 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 22 Oct 2013 20:59:19 -0400
Subject: [PATCH 082/101] exportfs: more detailed comment for path_reconnect

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 6d0a7fa9abb3..87e6dca69e43 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -93,7 +93,19 @@ find_disconnected_root(struct dentry *dentry)
 /*
  * Make sure target_dir is fully connected to the dentry tree.
  *
- * It may already be, as the flag isn't always updated when connection happens.
+ * On successful return, DCACHE_DISCONNECTED will be cleared on
+ * target_dir, and target_dir->d_parent->...->d_parent will reach the
+ * root of the filesystem.
+ *
+ * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
+ * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
+ * set but already be connected.  In that case we'll verify the
+ * connection to root and then clear the flag.
+ *
+ * Note that target_dir could be removed by a concurrent operation.  In
+ * that case reconnect_path may still succeed with target_dir fully
+ * connected, but further operations using the filehandle will fail when
+ * necessary (due to S_DEAD being set on the directory).
  */
 static int
 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)

From 0dbc018a490ed482a1236aad77ac12e20742b322 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 9 Sep 2013 16:15:13 -0400
Subject: [PATCH 083/101] exportfs: clear DISCONNECTED on all parents sooner

Once we've found any connected parent, we know all our parents are
connected--that's true even if there's a concurrent rename.  May as well
clear them all at once and be done with it.

Reviewed-by: Cristoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 87e6dca69e43..c65b748688ff 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -90,6 +90,24 @@ find_disconnected_root(struct dentry *dentry)
 	return dentry;
 }
 
+static void clear_disconnected(struct dentry *dentry)
+{
+	dget(dentry);
+	while (dentry->d_flags & DCACHE_DISCONNECTED) {
+		struct dentry *parent = dget_parent(dentry);
+
+		WARN_ON_ONCE(IS_ROOT(dentry));
+
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_DISCONNECTED;
+		spin_unlock(&dentry->d_lock);
+
+		dput(dentry);
+		dentry = parent;
+	}
+	dput(dentry);
+}
+
 /*
  * Make sure target_dir is fully connected to the dentry tree.
  *
@@ -128,10 +146,9 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 
 		if (!IS_ROOT(pd)) {
 			/* must have found a connected parent - great */
-			spin_lock(&pd->d_lock);
-			pd->d_flags &= ~DCACHE_DISCONNECTED;
-			spin_unlock(&pd->d_lock);
-			noprogress = 0;
+			clear_disconnected(target_dir);
+			dput(pd);
+			break;
 		} else {
 			/*
 			 * We have hit the top of a disconnected path, try to

From a056cc8934c7bd046dc44af559bba163115fde40 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 16 Oct 2013 21:09:30 -0400
Subject: [PATCH 084/101] exportfs: stop retrying once we race with
 rename/remove

There are two places here where we could race with a rename or remove:

	- We could find the parent, but then be removed or renamed away
	  from that parent directory before finding our name in that
	  directory.
	- We could find the parent, and find our name in that parent,
	  but then be renamed or removed before we look ourselves up by
	  that name in that parent.

In both cases the concurrent rename or remove will take care of
reconnecting the directory that we're currently examining.  Our target
directory should then also be connected.  Check this and clear
DISCONNECTED in these cases instead of looping around again.

Note: we *do* need to check that this actually happened if we want to be
robust in the face of corrupted filesystems: a corrupted filesystem
could just return a completely wrong parent, and we want to fail with an
error in that case before starting to clear DISCONNECTED on
non-DISCONNECTED filesystems.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c65b748688ff..6b5ddd5492bc 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -90,6 +90,23 @@ find_disconnected_root(struct dentry *dentry)
 	return dentry;
 }
 
+static bool dentry_connected(struct dentry *dentry)
+{
+	dget(dentry);
+	while (dentry->d_flags & DCACHE_DISCONNECTED) {
+		struct dentry *parent = dget_parent(dentry);
+
+		dput(dentry);
+		if (IS_ROOT(dentry)) {
+			dput(parent);
+			return false;
+		}
+		dentry = parent;
+	}
+	dput(dentry);
+	return true;
+}
+
 static void clear_disconnected(struct dentry *dentry)
 {
 	dget(dentry);
@@ -189,9 +206,9 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 				dput(pd);
 				if (err == -ENOENT)
 					/* some race between get_parent and
-					 * get_name?  just try again
+					 * get_name?
 					 */
-					continue;
+					goto out_reconnected;
 				break;
 			}
 			dprintk("%s: found name: %s\n", __func__, nbuf);
@@ -211,12 +228,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 			 * hopefully, npd == pd, though it isn't really
 			 * a problem if it isn't
 			 */
+			dput(npd);
+			dput(ppd);
 			if (npd == pd)
 				noprogress = 0;
 			else
-				printk("%s: npd != pd\n", __func__);
-			dput(npd);
-			dput(ppd);
+				goto out_reconnected;
 			if (IS_ROOT(pd)) {
 				/* something went wrong, we have to give up */
 				dput(pd);
@@ -233,6 +250,24 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 		return err;
 	}
 
+	return 0;
+out_reconnected:
+	/*
+	 * Someone must have renamed our entry into another parent, in
+	 * which case it has been reconnected by the rename.
+	 *
+	 * Or someone removed it entirely, in which case filehandle
+	 * lookup will succeed but the directory is now IS_DEAD and
+	 * subsequent operations on it will fail.
+	 *
+	 * Alternatively, maybe there was no race at all, and the
+	 * filesystem is just corrupt and gave us a parent that doesn't
+	 * actually contain any entry pointing to this inode.  So,
+	 * double check that this worked and return -ESTALE if not:
+	 */
+	if (!dentry_connected(target_dir))
+		return -ESTALE;
+	clear_disconnected(target_dir);
 	return 0;
 }
 

From e4b70ebeeba954cb9cbcf0f19016bb9c2b8711c1 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 16 Oct 2013 21:20:19 -0400
Subject: [PATCH 085/101] exportfs: eliminate unused "noprogress" counter

Note this counter is now being set to 0 on every pass through the loop,
so it no longer serves any useful purpose.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 6b5ddd5492bc..d8ba88ac10e5 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -145,18 +145,9 @@ static void clear_disconnected(struct dentry *dentry)
 static int
 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 {
-	int noprogress = 0;
 	int err = -ESTALE;
 
-	/*
-	 * It is possible that a confused file system might not let us complete
-	 * the path to the root.  For example, if get_parent returns a directory
-	 * in which we cannot find a name for the child.  While this implies a
-	 * very sick filesystem we don't want it to cause knfsd to spin.  Hence
-	 * the noprogress counter.  If we go through the loop 10 times (2 is
-	 * probably enough) without getting anywhere, we just give up
-	 */
-	while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) {
+	while (target_dir->d_flags & DCACHE_DISCONNECTED) {
 		struct dentry *pd = find_disconnected_root(target_dir);
 
 		BUG_ON(pd == mnt->mnt_sb->s_root);
@@ -230,9 +221,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 			 */
 			dput(npd);
 			dput(ppd);
-			if (npd == pd)
-				noprogress = 0;
-			else
+			if (npd != pd)
 				goto out_reconnected;
 			if (IS_ROOT(pd)) {
 				/* something went wrong, we have to give up */

From bbf7a8a3562f2de49ce24db3be0f514459dd7f8b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 17 Oct 2013 11:13:00 -0400
Subject: [PATCH 086/101] exportfs: move most of reconnect_path to helper
 function

Also replace 3 easily-confused three-letter acronyms by more helpful
variable names.

Just cleanup, no change in functionality, with one exception: the
dentry_connected() check in the "out_reconnected" case will now only
check the ancestors of the current dentry instead of checking all the
way from target_dir.  Since we've already verified connectivity up to
this dentry, that should be sufficient.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 164 +++++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 78 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index d8ba88ac10e5..d32ead9026f0 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -125,6 +125,86 @@ static void clear_disconnected(struct dentry *dentry)
 	dput(dentry);
 }
 
+/*
+ * Reconnect a directory dentry with its parent.
+ *
+ * This can return a dentry, or NULL, or an error.
+ *
+ * In the first case the returned dentry is the parent of the given
+ * dentry, and may itself need to be reconnected to its parent.
+ *
+ * In the NULL case, a concurrent VFS operation has either renamed or
+ * removed this directory.  The concurrent operation has reconnected our
+ * dentry, so we no longer need to.
+ */
+static struct dentry *reconnect_one(struct vfsmount *mnt,
+		struct dentry *dentry, char *nbuf)
+{
+	struct dentry *parent;
+	struct dentry *tmp;
+	int err;
+
+	parent = ERR_PTR(-EACCES);
+	mutex_lock(&dentry->d_inode->i_mutex);
+	if (mnt->mnt_sb->s_export_op->get_parent)
+		parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	if (IS_ERR(parent)) {
+		dprintk("%s: get_parent of %ld failed, err %d\n",
+			__func__, dentry->d_inode->i_ino, PTR_ERR(parent));
+		return parent;
+	}
+
+	dprintk("%s: find name of %lu in %lu\n", __func__,
+		dentry->d_inode->i_ino, parent->d_inode->i_ino);
+	err = exportfs_get_name(mnt, parent, nbuf, dentry);
+	if (err == -ENOENT)
+		goto out_reconnected;
+	if (err)
+		goto out_err;
+	dprintk("%s: found name: %s\n", __func__, nbuf);
+	mutex_lock(&parent->d_inode->i_mutex);
+	tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
+	mutex_unlock(&parent->d_inode->i_mutex);
+	if (IS_ERR(tmp)) {
+		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+		goto out_err;
+	}
+	if (tmp != dentry) {
+		dput(tmp);
+		goto out_reconnected;
+	}
+	dput(tmp);
+	if (IS_ROOT(dentry)) {
+		err = -ESTALE;
+		goto out_err;
+	}
+	return parent;
+
+out_err:
+	dput(parent);
+	return ERR_PTR(err);
+out_reconnected:
+	dput(parent);
+	/*
+	 * Someone must have renamed our entry into another parent, in
+	 * which case it has been reconnected by the rename.
+	 *
+	 * Or someone removed it entirely, in which case filehandle
+	 * lookup will succeed but the directory is now IS_DEAD and
+	 * subsequent operations on it will fail.
+	 *
+	 * Alternatively, maybe there was no race at all, and the
+	 * filesystem is just corrupt and gave us a parent that doesn't
+	 * actually contain any entry pointing to this inode.  So,
+	 * double check that this worked and return -ESTALE if not:
+	 */
+	if (!dentry_connected(dentry))
+		return ERR_PTR(-ESTALE);
+	return NULL;
+}
+
 /*
  * Make sure target_dir is fully connected to the dentry tree.
  *
@@ -158,76 +238,19 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 			dput(pd);
 			break;
 		} else {
+			struct dentry *parent;
 			/*
 			 * We have hit the top of a disconnected path, try to
 			 * find parent and connect.
-			 *
-			 * Racing with some other process renaming a directory
-			 * isn't much of a problem here.  If someone renames
-			 * the directory, it will end up properly connected,
-			 * which is what we want
-			 *
-			 * Getting the parent can't be supported generically,
-			 * the locking is too icky.
-			 *
-			 * Instead we just return EACCES.  If server reboots
-			 * or inodes get flushed, you lose
 			 */
-			struct dentry *ppd = ERR_PTR(-EACCES);
-			struct dentry *npd;
-
-			mutex_lock(&pd->d_inode->i_mutex);
-			if (mnt->mnt_sb->s_export_op->get_parent)
-				ppd = mnt->mnt_sb->s_export_op->get_parent(pd);
-			mutex_unlock(&pd->d_inode->i_mutex);
-
-			if (IS_ERR(ppd)) {
-				err = PTR_ERR(ppd);
-				dprintk("%s: get_parent of %ld failed, err %d\n",
-					__func__, pd->d_inode->i_ino, err);
-				dput(pd);
-				break;
-			}
-
-			dprintk("%s: find name of %lu in %lu\n", __func__,
-				pd->d_inode->i_ino, ppd->d_inode->i_ino);
-			err = exportfs_get_name(mnt, ppd, nbuf, pd);
-			if (err) {
-				dput(ppd);
-				dput(pd);
-				if (err == -ENOENT)
-					/* some race between get_parent and
-					 * get_name?
-					 */
-					goto out_reconnected;
-				break;
-			}
-			dprintk("%s: found name: %s\n", __func__, nbuf);
-			mutex_lock(&ppd->d_inode->i_mutex);
-			npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
-			mutex_unlock(&ppd->d_inode->i_mutex);
-			if (IS_ERR(npd)) {
-				err = PTR_ERR(npd);
-				dprintk("%s: lookup failed: %d\n",
-					__func__, err);
-				dput(ppd);
-				dput(pd);
-				break;
-			}
-			/* we didn't really want npd, we really wanted
-			 * a side-effect of the lookup.
-			 * hopefully, npd == pd, though it isn't really
-			 * a problem if it isn't
-			 */
-			dput(npd);
-			dput(ppd);
-			if (npd != pd)
+			 parent = reconnect_one(mnt, pd, nbuf);
+			 if (!parent)
 				goto out_reconnected;
-			if (IS_ROOT(pd)) {
-				/* something went wrong, we have to give up */
-				dput(pd);
+			if (IS_ERR(parent)) {
+				err = PTR_ERR(parent);
 				break;
 			}
+			dput(parent);
 		}
 		dput(pd);
 	}
@@ -241,21 +264,6 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 
 	return 0;
 out_reconnected:
-	/*
-	 * Someone must have renamed our entry into another parent, in
-	 * which case it has been reconnected by the rename.
-	 *
-	 * Or someone removed it entirely, in which case filehandle
-	 * lookup will succeed but the directory is now IS_DEAD and
-	 * subsequent operations on it will fail.
-	 *
-	 * Alternatively, maybe there was no race at all, and the
-	 * filesystem is just corrupt and gave us a parent that doesn't
-	 * actually contain any entry pointing to this inode.  So,
-	 * double check that this worked and return -ESTALE if not:
-	 */
-	if (!dentry_connected(target_dir))
-		return -ESTALE;
 	clear_disconnected(target_dir);
 	return 0;
 }

From efbf201f7a0be7ffc6532e672fbccb0eed4f5de0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 17 Oct 2013 21:42:35 -0400
Subject: [PATCH 087/101] exportfs: better variable name

Replace another unhelpful acronym.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index d32ead9026f0..b33b9c4deea0 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -228,14 +228,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 	int err = -ESTALE;
 
 	while (target_dir->d_flags & DCACHE_DISCONNECTED) {
-		struct dentry *pd = find_disconnected_root(target_dir);
+		struct dentry *dentry = find_disconnected_root(target_dir);
 
-		BUG_ON(pd == mnt->mnt_sb->s_root);
+		BUG_ON(dentry == mnt->mnt_sb->s_root);
 
-		if (!IS_ROOT(pd)) {
+		if (!IS_ROOT(dentry)) {
 			/* must have found a connected parent - great */
 			clear_disconnected(target_dir);
-			dput(pd);
+			dput(dentry);
 			break;
 		} else {
 			struct dentry *parent;
@@ -243,7 +243,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 			 * We have hit the top of a disconnected path, try to
 			 * find parent and connect.
 			 */
-			 parent = reconnect_one(mnt, pd, nbuf);
+			 parent = reconnect_one(mnt, dentry, nbuf);
 			 if (!parent)
 				goto out_reconnected;
 			if (IS_ERR(parent)) {
@@ -252,7 +252,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 			}
 			dput(parent);
 		}
-		dput(pd);
+		dput(dentry);
 	}
 
 	if (target_dir->d_flags & DCACHE_DISCONNECTED) {

From f27c9298fd717e1f7e63e314a7a85a3a7e77139d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 17 Oct 2013 21:34:21 -0400
Subject: [PATCH 088/101] exportfs: fix quadratic behavior in filehandle lookup

Suppose we're given the filehandle for a directory whose closest
ancestor in the dcache is its Nth ancestor.

The main loop in reconnect_path searches for an IS_ROOT ancestor of
target_dir, reconnects that ancestor to its parent, then recommences the
search for an IS_ROOT ancestor from target_dir.

This behavior is quadratic in N.  And there's really no need to restart
the search from target_dir each time: once a directory has been looked
up, it won't become IS_ROOT again.  So instead of starting from
target_dir each time, we can continue where we left off.

This simplifies the code and improves performance on very deep directory
heirachies.  (I can't think of any reason anyone should need heirarchies
a hundred or more deep, but the performance improvement may be valuable
if only to limit damage in case of abuse.)

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 66 +++++++++------------------------------------
 1 file changed, 13 insertions(+), 53 deletions(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b33b9c4deea0..48a359dd286e 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -69,27 +69,6 @@ find_acceptable_alias(struct dentry *result,
 	return NULL;
 }
 
-/*
- * Find root of a disconnected subtree and return a reference to it.
- */
-static struct dentry *
-find_disconnected_root(struct dentry *dentry)
-{
-	dget(dentry);
-	while (!IS_ROOT(dentry)) {
-		struct dentry *parent = dget_parent(dentry);
-
-		if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
-			dput(parent);
-			break;
-		}
-
-		dput(dentry);
-		dentry = parent;
-	}
-	return dentry;
-}
-
 static bool dentry_connected(struct dentry *dentry)
 {
 	dget(dentry);
@@ -225,45 +204,26 @@ out_reconnected:
 static int
 reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 {
-	int err = -ESTALE;
+	struct dentry *dentry, *parent;
 
-	while (target_dir->d_flags & DCACHE_DISCONNECTED) {
-		struct dentry *dentry = find_disconnected_root(target_dir);
+	dentry = dget(target_dir);
 
+	while (dentry->d_flags & DCACHE_DISCONNECTED) {
 		BUG_ON(dentry == mnt->mnt_sb->s_root);
 
-		if (!IS_ROOT(dentry)) {
-			/* must have found a connected parent - great */
-			clear_disconnected(target_dir);
-			dput(dentry);
+		if (IS_ROOT(dentry))
+			parent = reconnect_one(mnt, dentry, nbuf);
+		else
+			parent = dget_parent(dentry);
+
+		if (!parent)
 			break;
-		} else {
-			struct dentry *parent;
-			/*
-			 * We have hit the top of a disconnected path, try to
-			 * find parent and connect.
-			 */
-			 parent = reconnect_one(mnt, dentry, nbuf);
-			 if (!parent)
-				goto out_reconnected;
-			if (IS_ERR(parent)) {
-				err = PTR_ERR(parent);
-				break;
-			}
-			dput(parent);
-		}
 		dput(dentry);
+		if (IS_ERR(parent))
+			return PTR_ERR(parent);
+		dentry = parent;
 	}
-
-	if (target_dir->d_flags & DCACHE_DISCONNECTED) {
-		/* something went wrong - oh-well */
-		if (!err)
-			err = -ESTALE;
-		return err;
-	}
-
-	return 0;
-out_reconnected:
+	dput(dentry);
 	clear_disconnected(target_dir);
 	return 0;
 }

From 375e289ea85166c5241c570940e7e7e966c63a9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 18 Apr 2012 15:16:33 -0400
Subject: [PATCH 089/101] vfs: pull ext4's double-i_mutex-locking into common
 code

We want to do this elsewhere as well.

Also catch any attempts to use it for directories (where this ordering
would conflict with ancestor-first directory ordering in lock_rename).

Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Dave Chinner <david@fromorbit.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h        |  2 --
 fs/ext4/ioctl.c       |  4 ++--
 fs/ext4/move_extent.c | 40 ++--------------------------------------
 fs/inode.c            | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/fs.h    |  3 +++
 5 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af815ea9d7cc..d01d62315f7e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first,
 					    struct inode *second);
 extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
 					  struct inode *donor_inode);
-void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
-void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 start_orig, __u64 start_donor,
 			     __u64 len, __u64 *moved_len);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a569d335f804..60589b60e9b0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 
 	/* Protect orig inodes against a truncate and make sure,
 	 * that only 1 swap_inode_boot_loader is running. */
-	ext4_inode_double_lock(inode, inode_bl);
+	lock_two_nondirectories(inode, inode_bl);
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode_bl->i_data, 0);
@@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	ext4_inode_resume_unlocked_dio(inode);
 	ext4_inode_resume_unlocked_dio(inode_bl);
 
-	ext4_inode_double_unlock(inode, inode_bl);
+	unlock_two_nondirectories(inode, inode_bl);
 
 	iput(inode_bl);
 
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7fa4d855dbd5..773b503bd18c 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1202,42 +1202,6 @@ mext_check_arguments(struct inode *orig_inode,
 	return 0;
 }
 
-/**
- * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
- *
- * @inode1:	the inode structure
- * @inode2:	the inode structure
- *
- * Lock two inodes' i_mutex
- */
-void
-ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
-	BUG_ON(inode1 == inode2);
-	if (inode1 < inode2) {
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
-	}
-}
-
-/**
- * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
- *
- * @inode1:     the inode that is released first
- * @inode2:     the inode that is released second
- *
- */
-
-void
-ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
-	mutex_unlock(&inode1->i_mutex);
-	mutex_unlock(&inode2->i_mutex);
-}
-
 /**
  * ext4_move_extents - Exchange the specified range of a file
  *
@@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 		return -EINVAL;
 	}
 	/* Protect orig and donor inodes against a truncate */
-	ext4_inode_double_lock(orig_inode, donor_inode);
+	lock_two_nondirectories(orig_inode, donor_inode);
 
 	/* Wait for all existing dio workers */
 	ext4_inode_block_unlocked_dio(orig_inode);
@@ -1535,7 +1499,7 @@ out:
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
 	ext4_inode_resume_unlocked_dio(orig_inode);
 	ext4_inode_resume_unlocked_dio(donor_inode);
-	ext4_inode_double_unlock(orig_inode, donor_inode);
+	unlock_two_nondirectories(orig_inode, donor_inode);
 
 	return ret;
 }
diff --git a/fs/inode.c b/fs/inode.c
index bf7a8b3ab937..ef25afdf3906 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -942,6 +942,42 @@ void unlock_new_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+/**
+ * lock_two_nondirectories - take two i_mutexes on non-directory objects
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ */
+void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
+	if (inode1 == inode2 || !inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		return;
+	}
+	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
+	if (inode1 < inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	} else {
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+	}
+}
+EXPORT_SYMBOL(lock_two_nondirectories);
+
+/**
+ * unlock_two_nondirectories - release locks from lock_two_nondirectories()
+ * @inode1: first inode to unlock
+ * @inode2: second inode to unlock
+ */
+void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
+{
+	mutex_unlock(&inode1->i_mutex);
+	if (inode2 && inode2 != inode1)
+		mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(unlock_two_nondirectories);
+
 /**
  * iget5_locked - obtain an inode from a mounted file system
  * @sb:		super block of file system
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e44b0893db8..4e1a0b41f966 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -637,6 +637,9 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
+void lock_two_nondirectories(struct inode *, struct inode*);
+void unlock_two_nondirectories(struct inode *, struct inode*);
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic

From 275555163e3ac09c016c964a10e0f389cf152c4c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 25 Apr 2012 07:19:52 -0400
Subject: [PATCH 090/101] vfs: don't use PARENT/CHILD lock classes for
 non-directories

Reserve I_MUTEX_PARENT and I_MUTEX_CHILD for locking of actual
directories.

(Also I_MUTEX_QUOTA isn't really a meaningful name for this locking
class any more; fixed in a later patch.)

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index ef25afdf3906..51f329b98aa4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -951,16 +951,16 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 {
 	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
 	if (inode1 == inode2 || !inode2) {
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock(&inode1->i_mutex);
 		return;
 	}
 	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
 	if (inode1 < inode2) {
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+		mutex_lock(&inode1->i_mutex);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_QUOTA);
 	} else {
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+		mutex_lock(&inode2->i_mutex);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_QUOTA);
 	}
 }
 EXPORT_SYMBOL(lock_two_nondirectories);

From 40bd22c9f8617ddd5da06044c81f72a2cf700791 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 18 Apr 2012 15:21:34 -0400
Subject: [PATCH 091/101] vfs: rename I_MUTEX_QUOTA now that it's not used for
 quotas

I_MUTEX_QUOTA is now just being used whenever we want to lock two
non-directories.  So the name isn't right.  I_MUTEX_NONDIR2 isn't
especially elegant but it's the best I could think of.

Also fix some outdated documentation.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 4 ++--
 include/linux/fs.h | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 51f329b98aa4..ce48c359ce9e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -957,10 +957,10 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
 	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
 	if (inode1 < inode2) {
 		mutex_lock(&inode1->i_mutex);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_QUOTA);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
 	} else {
 		mutex_lock(&inode2->i_mutex);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_QUOTA);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2);
 	}
 }
 EXPORT_SYMBOL(lock_two_nondirectories);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e1a0b41f966..ed7f94af1ab2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,10 +623,13 @@ static inline int inode_unhashed(struct inode *inode)
  * 0: the object of the current VFS operation
  * 1: parent
  * 2: child/target
- * 3: quota file
+ * 3: xattr
+ * 4: second non-directory
+ * The last is for certain operations (such as rename) which lock two
+ * non-directories at once.
  *
  * The locking order between these classes is
- * parent -> child -> normal -> xattr -> quota
+ * parent -> child -> normal -> xattr -> second non-directory
  */
 enum inode_i_mutex_lock_class
 {
@@ -634,7 +637,7 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_PARENT,
 	I_MUTEX_CHILD,
 	I_MUTEX_XATTR,
-	I_MUTEX_QUOTA
+	I_MUTEX_NONDIR2
 };
 
 void lock_two_nondirectories(struct inode *, struct inode*);

From 6cedba8962f440c72447f811d0d530a8a9dc637a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 5 Mar 2012 11:40:41 -0500
Subject: [PATCH 092/101] vfs: take i_mutex on renamed file

A read delegation is used by NFSv4 as a guarantee that a client can
perform local read opens without informing the server.

The open operation takes the last component of the pathname as an
argument, thus is also a lookup operation, and giving the client the
above guarantee means informing the client before we allow anything that
would change the set of names pointing to the inode.

Therefore, we need to break delegations on rename, link, and unlink.

We also need to prevent new delegations from being acquired while one of
these operations is in progress.

We could add some completely new locking for that purpose, but it's
simpler to use the i_mutex, since that's already taken by all the
operations we care about.

The single exception is rename.  So, modify rename to take the i_mutex
on the file that is being renamed.

Also fix up lockdep and Documentation/filesystems/directory-locking to
reflect the change.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/directory-locking | 31 +++++++++++++++------
 fs/namei.c                                  | 10 +++----
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking
index ff7b611abf33..09bbf9a54f80 100644
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -2,6 +2,10 @@
 kinds of locks - per-inode (->i_mutex) and per-filesystem
 (->s_vfs_rename_mutex).
 
+	When taking the i_mutex on multiple non-directory objects, we
+always acquire the locks in order by increasing address.  We'll call
+that "inode pointer" order in the following.
+
 	For our purposes all operations fall in 5 classes:
 
 1) read access.  Locking rules: caller locks directory we are accessing.
@@ -12,8 +16,9 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem
 locks victim and calls the method.
 
 4) rename() that is _not_ cross-directory.  Locking rules: caller locks
-the parent, finds source and target, if target already exists - locks it
-and then calls the method.
+the parent and finds source and target.  If target already exists, lock
+it.  If source is a non-directory, lock it.  If that means we need to
+lock both, lock them in inode pointer order.
 
 5) link creation.  Locking rules:
 	* lock parent
@@ -30,7 +35,9 @@ rules:
 		fail with -ENOTEMPTY
 	* if new parent is equal to or is a descendent of source
 		fail with -ELOOP
-	* if target exists - lock it.
+	* If target exists, lock it.  If source is a non-directory, lock
+	  it.  In case that means we need to lock both source and target,
+	  do so in inode pointer order.
 	* call the method.
 
 
@@ -56,9 +63,11 @@ objects - A < B iff A is an ancestor of B.
     renames will be blocked on filesystem lock and we don't start changing
     the order until we had acquired all locks).
 
-(3) any operation holds at most one lock on non-directory object and
-    that lock is acquired after all other locks.  (Proof: see descriptions
-    of operations).
+(3) locks on non-directory objects are acquired only after locks on
+    directory objects, and are acquired in inode pointer order.
+    (Proof: all operations but renames take lock on at most one
+    non-directory object, except renames, which take locks on source and
+    target in inode pointer order in the case they are not directories.)
 
 	Now consider the minimal deadlock.  Each process is blocked on
 attempt to acquire some lock and already holds at least one lock.  Let's
@@ -66,9 +75,13 @@ consider the set of contended locks.  First of all, filesystem lock is
 not contended, since any process blocked on it is not holding any locks.
 Thus all processes are blocked on ->i_mutex.
 
-	Non-directory objects are not contended due to (3).  Thus link
-creation can't be a part of deadlock - it can't be blocked on source
-and it means that it doesn't hold any locks.
+	By (3), any process holding a non-directory lock can only be
+waiting on another non-directory lock with a larger address.  Therefore
+the process holding the "largest" such lock can always make progress, and
+non-directory objects are not included in the set of contended locks.
+
+	Thus link creation can't be a part of deadlock - it can't be
+blocked on source and it means that it doesn't hold any locks.
 
 	Any contended object is either held by cross-directory rename or
 has a child that is also contended.  Indeed, suppose that it is held by
diff --git a/fs/namei.c b/fs/namei.c
index 2a5a7aa9f43f..88cec0330bf7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3918,7 +3918,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  *	   That's where 4.4 screws up. Current fix: serialization on
  *	   sb->s_vfs_rename_mutex. We might be more accurate, but that's another
  *	   story.
- *	c) we have to lock _three_ objects - parents and victim (if it exists).
+ *	c) we have to lock _four_ objects - parents and victim (if it exists),
+ *	   and source (if it is not a directory).
  *	   And that - after we got ->i_mutex on parents (until then we don't know
  *	   whether the target exists).  Solution: try to be smart with locking
  *	   order for inodes.  We rely on the fact that tree topology may change
@@ -3994,6 +3995,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 			    struct inode *new_dir, struct dentry *new_dentry)
 {
 	struct inode *target = new_dentry->d_inode;
+	struct inode *source = old_dentry->d_inode;
 	int error;
 
 	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4001,8 +4003,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		return error;
 
 	dget(new_dentry);
-	if (target)
-		mutex_lock(&target->i_mutex);
+	lock_two_nondirectories(source, target);
 
 	error = -EBUSY;
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
@@ -4017,8 +4018,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 		d_move(old_dentry, new_dentry);
 out:
-	if (target)
-		mutex_unlock(&target->i_mutex);
+	unlock_two_nondirectories(source, target);
 	dput(new_dentry);
 	return error;
 }

From 617588d5186c887eb94321b021bb5a46f896f4b3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 1 Jul 2011 15:18:34 -0400
Subject: [PATCH 093/101] locks: introduce new FL_DELEG lock flag

For now FL_DELEG is just a synonym for FL_LEASE.  So this patch doesn't
change behavior.

Next we'll modify break_lease to treat FL_DELEG leases differently, to
account for the fact that NFSv4 delegations should be broken in more
situations than Windows oplocks.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c          | 2 +-
 fs/nfsd/nfs4state.c | 2 +-
 include/linux/fs.h  | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index f155ffd648bb..079abcd8a836 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -134,7 +134,7 @@
 
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
-#define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
+#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG))
 
 static bool lease_breaking(struct file_lock *fl)
 {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a601fd49f997..f36a30a9f2d1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
 		return NULL;
 	locks_init_lock(fl);
 	fl->fl_lmops = &nfsd_lease_mng_ops;
-	fl->fl_flags = FL_LEASE;
+	fl->fl_flags = FL_DELEG;
 	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
 	fl->fl_end = OFFSET_MAX;
 	fl->fl_owner = (fl_owner_t)(dp->dl_file);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed7f94af1ab2..129e150f9e94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -880,6 +880,7 @@ static inline int file_check_writeable(struct file *filp)
 
 #define FL_POSIX	1
 #define FL_FLOCK	2
+#define FL_DELEG	4	/* NFSv4 delegation */
 #define FL_ACCESS	8	/* not trying to lock, just looking */
 #define FL_EXISTS	16	/* when unlocking, test for existence */
 #define FL_LEASE	32	/* lease held on this file */

From df4e8d2c1d2bbbbace706bfe5417320c9e3fbee3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 5 Mar 2012 13:18:59 -0500
Subject: [PATCH 094/101] locks: implement delegations

Implement NFSv4 delegations at the vfs level using the new FL_DELEG lock
type.

Note nfsd is the only delegation user and is only using read
delegations.  Warn on any attempt to set a write delegation for now.
We'll come back to that case later.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c         | 55 +++++++++++++++++++++++++++++++++++++---------
 include/linux/fs.h | 18 ++++++++++++---
 2 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 079abcd8a836..f99d52bdd05a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode)
 	}
 }
 
+static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
+{
+	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
+		return false;
+	return locks_conflict(breaker, lease);
+}
+
 /**
  *	__break_lease	-	revoke all outstanding leases on file
  *	@inode: the inode of the file to return
- *	@mode: the open mode (read or write)
+ *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
+ *	    break all leases
+ *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break
+ *	    only delegations
  *
  *	break_lease (inlined for speed) has checked there already is at least
  *	some kind of lock (maybe a lease) on this file.  Leases are broken on
  *	a call to open() or truncate().  This function can sleep unless you
  *	specified %O_NONBLOCK to your open().
  */
-int __break_lease(struct inode *inode, unsigned int mode)
+int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	int error = 0;
 	struct file_lock *new_fl, *flock;
 	struct file_lock *fl;
 	unsigned long break_time;
 	int i_have_this_lease = 0;
+	bool lease_conflict = false;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
 
 	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 	if (IS_ERR(new_fl))
 		return PTR_ERR(new_fl);
+	new_fl->fl_flags = type;
 
 	spin_lock(&inode->i_lock);
 
@@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode)
 	if ((flock == NULL) || !IS_LEASE(flock))
 		goto out;
 
-	if (!locks_conflict(flock, new_fl))
+	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+		if (leases_conflict(fl, new_fl)) {
+			lease_conflict = true;
+			if (fl->fl_owner == current->files)
+				i_have_this_lease = 1;
+		}
+	}
+	if (!lease_conflict)
 		goto out;
 
-	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
-		if (fl->fl_owner == current->files)
-			i_have_this_lease = 1;
-
 	break_time = 0;
 	if (lease_break_time > 0) {
 		break_time = jiffies + lease_break_time * HZ;
@@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
 	}
 
 	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+		if (!leases_conflict(fl, new_fl))
+			continue;
 		if (want_write) {
 			if (fl->fl_flags & FL_UNLOCK_PENDING)
 				continue;
@@ -1379,7 +1396,7 @@ restart:
 		 */
 		for (flock = inode->i_flock; flock && IS_LEASE(flock);
 				flock = flock->fl_next) {
-			if (locks_conflict(new_fl, flock))
+			if (leases_conflict(new_fl, flock))
 				goto restart;
 		}
 		error = 0;
@@ -1460,9 +1477,26 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
 
 	lease = *flp;
+	/*
+	 * In the delegation case we need mutual exclusion with
+	 * a number of operations that take the i_mutex.  We trylock
+	 * because delegations are an optional optimization, and if
+	 * there's some chance of a conflict--we'd rather not
+	 * bother, maybe that's a sign this just isn't a good file to
+	 * hand out a delegation on.
+	 */
+	if (is_deleg && !mutex_trylock(&inode->i_mutex))
+		return -EAGAIN;
+
+	if (is_deleg && arg == F_WRLCK) {
+		/* Write delegations are not currently supported: */
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
 
 	error = -EAGAIN;
 	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1514,9 +1548,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 		goto out;
 
 	locks_insert_lock(before, lease);
-	return 0;
-
+	error = 0;
 out:
+	if (is_deleg)
+		mutex_unlock(&inode->i_mutex);
 	return error;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 129e150f9e94..8e4be1be1a62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1022,7 +1022,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *);
 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
-extern int __break_lease(struct inode *inode, unsigned int flags);
+extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
@@ -1131,7 +1131,7 @@ static inline int flock_lock_file_wait(struct file *filp,
 	return -ENOLCK;
 }
 
-static inline int __break_lease(struct inode *inode, unsigned int mode)
+static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	return 0;
 }
@@ -1961,9 +1961,17 @@ static inline int locks_verify_truncate(struct inode *inode,
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
 	if (inode->i_flock)
-		return __break_lease(inode, mode);
+		return __break_lease(inode, mode, FL_LEASE);
 	return 0;
 }
+
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+	if (inode->i_flock)
+		return __break_lease(inode, mode, FL_DELEG);
+	return 0;
+}
+
 #else /* !CONFIG_FILE_LOCKING */
 static inline int locks_mandatory_locked(struct inode *inode)
 {
@@ -2003,6 +2011,10 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 	return 0;
 }
 
+static inline int break_deleg(struct inode *inode, unsigned int mode)
+{
+	return 0;
+}
 #endif /* CONFIG_FILE_LOCKING */
 
 /* fs/open.c */

From 9accbb977ab78234b8f298df5f306ed08d06bedb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 28 Aug 2012 07:03:24 -0400
Subject: [PATCH 095/101] namei: minor vfs_unlink cleanup

We'll be using dentry->d_inode in one more place.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 88cec0330bf7..e633a58d4222 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3617,6 +3617,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
 
 int vfs_unlink(struct inode *dir, struct dentry *dentry)
 {
+	struct inode *target = dentry->d_inode;
 	int error = may_delete(dir, dentry, 0);
 
 	if (error)
@@ -3625,7 +3626,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	if (!dir->i_op->unlink)
 		return -EPERM;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
+	mutex_lock(&target->i_mutex);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
 	else {
@@ -3636,11 +3637,11 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 				dont_mount(dentry);
 		}
 	}
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	mutex_unlock(&target->i_mutex);
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
 	if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-		fsnotify_link_count(dentry->d_inode);
+		fsnotify_link_count(target);
 		d_delete(dentry);
 	}
 

From b21996e36c8e3b92a84e972378bde80b43acd890 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 09:14:34 -0400
Subject: [PATCH 096/101] locks: break delegations on unlink

We need to break delegations on any operation that changes the set of
links pointing to an inode.  Start with unlink.

Such operations also hold the i_mutex on a parent directory.  Breaking a
delegation may require waiting for a timeout (by default 90 seconds) in
the case of a unresponsive NFS client.  To avoid blocking all directory
operations, we therefore drop locks before waiting for the delegation.
The logic then looks like:

	acquire locks
	...
	test for delegation; if found:
		take reference on inode
		release locks
		wait for delegation break
		drop reference on inode
		retry

It is possible this could never terminate.  (Even if we take precautions
to prevent another delegation being acquired on the same inode, we could
get a different inode on each retry.)  But this seems very unlikely.

The initial test for a delegation happens after the lock on the target
inode is acquired, but the directory inode may have been acquired
further up the call stack.  We therefore add a "struct inode **"
argument to any intervening functions, which we use to pass the inode
back up to the caller in the case it needs a delegation synchronously
broken.

Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c |  2 +-
 fs/cachefiles/namei.c   |  2 +-
 fs/ecryptfs/inode.c     |  4 ++--
 fs/namei.c              | 42 ++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/vfs.c           |  2 +-
 include/linux/fs.h      |  2 +-
 ipc/mqueue.c            |  2 +-
 7 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 7413d065906b..1b8490e2fbde 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -324,7 +324,7 @@ static int handle_remove(const char *nodename, struct device *dev)
 			mutex_lock(&dentry->d_inode->i_mutex);
 			notify_change(dentry, &newattrs);
 			mutex_unlock(&dentry->d_inode->i_mutex);
-			err = vfs_unlink(parent.dentry->d_inode, dentry);
+			err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
 			if (!err || err == -ENOENT)
 				deleted = 1;
 		}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index f4a08d7fa2f7..31d480c0e046 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 		if (ret < 0) {
 			cachefiles_io_error(cache, "Unlink security error");
 		} else {
-			ret = vfs_unlink(dir->d_inode, rep);
+			ret = vfs_unlink(dir->d_inode, rep, NULL);
 
 			if (preemptive)
 				cachefiles_mark_object_buried(cache, rep);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0f9b66eaa767..dc60b8bd09ec 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
 
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	rc = vfs_unlink(lower_dir_inode, lower_dentry);
+	rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
 	if (rc) {
 		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
 		goto out_unlock;
@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
 	inode = __ecryptfs_get_inode(lower_dentry->d_inode,
 				     directory_inode->i_sb);
 	if (IS_ERR(inode)) {
-		vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+		vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
 		goto out_lock;
 	}
 	fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
diff --git a/fs/namei.c b/fs/namei.c
index e633a58d4222..67ce331a3ed8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
 	return do_rmdir(AT_FDCWD, pathname);
 }
 
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:	parent directory
+ * @dentry:	victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
 {
 	struct inode *target = dentry->d_inode;
 	int error = may_delete(dir, dentry, 0);
@@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
+			error = break_deleg(target, O_WRONLY|O_NONBLOCK);
+			if (error) {
+				if (error == -EWOULDBLOCK && delegated_inode) {
+					*delegated_inode = target;
+					ihold(target);
+				}
+				goto out;
+			}
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
 				dont_mount(dentry);
 		}
 	}
+out:
 	mutex_unlock(&target->i_mutex);
 
 	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
@@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	struct dentry *dentry;
 	struct nameidata nd;
 	struct inode *inode = NULL;
+	struct inode *delegated_inode = NULL;
 	unsigned int lookup_flags = 0;
 retry:
 	name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3675,7 +3703,7 @@ retry:
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit1;
-
+retry_deleg:
 	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
@@ -3690,13 +3718,21 @@ retry:
 		error = security_path_unlink(&nd.path, dentry);
 		if (error)
 			goto exit2;
-		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+		error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
 		dput(dentry);
 	}
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	if (inode)
 		iput(inode);	/* truncate the inode here */
+	inode = NULL;
+	if (delegated_inode) {
+		error = break_deleg(delegated_inode, O_WRONLY);
+		iput(delegated_inode);
+		delegated_inode = NULL;
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(nd.path.mnt);
 exit1:
 	path_put(&nd.path);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 13886f7f40d5..7a810235d599 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1910,7 +1910,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	if (host_err)
 		goto out_put;
 	if (type != S_IFDIR)
-		host_err = vfs_unlink(dirp, rdentry);
+		host_err = vfs_unlink(dirp, rdentry, NULL);
 	else
 		host_err = vfs_rmdir(dirp, rdentry);
 	if (!host_err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8e4be1be1a62..a5799233142a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *);
+extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 
 /*
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ae1996d3c539..95827ce2f3c7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 		err = -ENOENT;
 	} else {
 		ihold(inode);
-		err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+		err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
 	}
 	dput(dentry);
 

From 5a14696c1795d3843673b5cf1982d0e5357a5bbf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 28 Aug 2012 07:50:40 -0700
Subject: [PATCH 097/101] locks: helper functions for delegation breaking

We'll need the same logic for rename and link.

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 13 +++----------
 include/linux/fs.h | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 67ce331a3ed8..cfaeaae0f2db 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3650,14 +3650,9 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
 	else {
 		error = security_inode_unlink(dir, dentry);
 		if (!error) {
-			error = break_deleg(target, O_WRONLY|O_NONBLOCK);
-			if (error) {
-				if (error == -EWOULDBLOCK && delegated_inode) {
-					*delegated_inode = target;
-					ihold(target);
-				}
+			error = try_break_deleg(target, delegated_inode);
+			if (error)
 				goto out;
-			}
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
 				dont_mount(dentry);
@@ -3727,9 +3722,7 @@ exit2:
 		iput(inode);	/* truncate the inode here */
 	inode = NULL;
 	if (delegated_inode) {
-		error = break_deleg(delegated_inode, O_WRONLY);
-		iput(delegated_inode);
-		delegated_inode = NULL;
+		error = break_deleg_wait(&delegated_inode);
 		if (!error)
 			goto retry_deleg;
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a5799233142a..931f919f44e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1905,6 +1905,9 @@ extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
+extern void ihold(struct inode * inode);
+extern void iput(struct inode *);
+
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
@@ -1972,6 +1975,28 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
 	return 0;
 }
 
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+	int ret;
+
+	ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
+	if (ret == -EWOULDBLOCK && delegated_inode) {
+		*delegated_inode = inode;
+		ihold(inode);
+	}
+	return ret;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+	int ret;
+
+	ret = break_deleg(*delegated_inode, O_WRONLY);
+	iput(*delegated_inode);
+	*delegated_inode = NULL;
+	return ret;
+}
+
 #else /* !CONFIG_FILE_LOCKING */
 static inline int locks_mandatory_locked(struct inode *inode)
 {
@@ -2015,6 +2040,18 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
 {
 	return 0;
 }
+
+static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+{
+	return 0;
+}
+
+static inline int break_deleg_wait(struct inode **delegated_inode)
+{
+	BUG();
+	return 0;
+}
+
 #endif /* CONFIG_FILE_LOCKING */
 
 /* fs/open.c */
@@ -2350,8 +2387,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
 extern void address_space_init_once(struct address_space *mapping);
-extern void ihold(struct inode * inode);
-extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);

From 8e6d782cab50884ba94324632700e6233a252f6a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 16:59:58 -0400
Subject: [PATCH 098/101] locks: break delegations on rename

Cc: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 .../lustre/include/linux/lustre_compat25.h    |  4 +-
 .../staging/lustre/lustre/lvfs/lvfs_linux.c   |  2 +-
 fs/cachefiles/namei.c                         |  2 +-
 fs/ecryptfs/inode.c                           |  3 +-
 fs/namei.c                                    | 47 +++++++++++++++++--
 fs/nfsd/vfs.c                                 |  2 +-
 include/linux/fs.h                            |  2 +-
 7 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 9243dfab43d3..80b019bf969c 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 #define ll_vfs_unlink(inode,entry,mnt)	  vfs_unlink(inode,entry)
 #define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
 #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-		vfs_rename(old,old_dir,new,new_dir)
+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \
+		vfs_rename(old,old_dir,new,new_dir,delegated_inode)
 
 #define cfs_bio_io_error(a,b)   bio_io_error((a))
 #define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
index 18e1b47a1d65..4ed7c9f0a8be 100644
--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c
@@ -220,7 +220,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
 		GOTO(put_old, err = PTR_ERR(dchild_new));
 
 	err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
-			    dir->d_inode, dchild_new, mnt);
+			    dir->d_inode, dchild_new, mnt, NULL);
 
 	dput(dchild_new);
 put_old:
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 31d480c0e046..ca65f39dc8dc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -396,7 +396,7 @@ try_again:
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
 		ret = vfs_rename(dir->d_inode, rep,
-				 cache->graveyard->d_inode, grave);
+				 cache->graveyard->d_inode, grave, NULL);
 		if (ret != 0 && ret != -ENOMEM)
 			cachefiles_io_error(cache,
 					    "Rename failed with error %d", ret);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index dc60b8bd09ec..c23b01bb7e04 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out_lock;
 	}
 	rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-			lower_new_dir_dentry->d_inode, lower_new_dentry);
+			lower_new_dir_dentry->d_inode, lower_new_dentry,
+			NULL);
 	if (rc)
 		goto out_lock;
 	if (target_inode)
diff --git a/fs/namei.c b/fs/namei.c
index cfaeaae0f2db..ce7e580e4e14 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4022,7 +4022,8 @@ out:
 }
 
 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-			    struct inode *new_dir, struct dentry *new_dentry)
+			    struct inode *new_dir, struct dentry *new_dentry,
+			    struct inode **delegated_inode)
 {
 	struct inode *target = new_dentry->d_inode;
 	struct inode *source = old_dentry->d_inode;
@@ -4039,6 +4040,14 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		goto out;
 
+	error = try_break_deleg(source, delegated_inode);
+	if (error)
+		goto out;
+	if (target) {
+		error = try_break_deleg(target, delegated_inode);
+		if (error)
+			goto out;
+	}
 	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (error)
 		goto out;
@@ -4053,8 +4062,30 @@ out:
 	return error;
 }
 
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:	parent of source
+ * @old_dentry:	source
+ * @new_dir:	parent of destination
+ * @new_dentry:	destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct inode **delegated_inode)
 {
 	int error;
 	int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
@@ -4082,7 +4113,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (is_dir)
 		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 	else
-		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
 	if (!error)
 		fsnotify_move(old_dir, new_dir, old_name, is_dir,
 			      new_dentry->d_inode, old_dentry);
@@ -4098,6 +4129,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 	struct dentry *old_dentry, *new_dentry;
 	struct dentry *trap;
 	struct nameidata oldnd, newnd;
+	struct inode *delegated_inode = NULL;
 	struct filename *from;
 	struct filename *to;
 	unsigned int lookup_flags = 0;
@@ -4137,6 +4169,7 @@ retry:
 	newnd.flags &= ~LOOKUP_PARENT;
 	newnd.flags |= LOOKUP_RENAME_TARGET;
 
+retry_deleg:
 	trap = lock_rename(new_dir, old_dir);
 
 	old_dentry = lookup_hash(&oldnd);
@@ -4173,13 +4206,19 @@ retry:
 	if (error)
 		goto exit5;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
-				   new_dir->d_inode, new_dentry);
+				   new_dir->d_inode, new_dentry,
+				   &delegated_inode);
 exit5:
 	dput(new_dentry);
 exit4:
 	dput(old_dentry);
 exit3:
 	unlock_rename(new_dir, old_dir);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(oldnd.path.mnt);
 exit2:
 	if (retry_estale(error, lookup_flags))
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a810235d599..45bf0295894d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1837,7 +1837,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		if (host_err)
 			goto out_dput_new;
 	}
-	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
 	if (!host_err) {
 		host_err = commit_metadata(tfhp);
 		if (!host_err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 931f919f44e1..5bcff883fa90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1456,7 +1456,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
 
 /*
  * VFS dentry helper functions.

From 146a8595c6399ee6ab4b5cc34c0d28aa4835fdc5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 17:14:31 -0400
Subject: [PATCH 099/101] locks: break delegations on link

Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/inode.c |  2 +-
 fs/namei.c          | 36 ++++++++++++++++++++++++++++++++----
 fs/nfsd/vfs.c       |  2 +-
 include/linux/fs.h  |  2 +-
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c23b01bb7e04..1c628f023041 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
 	dget(lower_new_dentry);
 	lower_dir_dentry = lock_parent(lower_new_dentry);
 	rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
-		      lower_new_dentry);
+		      lower_new_dentry, NULL);
 	if (rc || !lower_new_dentry->d_inode)
 		goto out_lock;
 	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
diff --git a/fs/namei.c b/fs/namei.c
index ce7e580e4e14..251178a1e383 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3819,7 +3819,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
 	return sys_symlinkat(oldname, AT_FDCWD, newname);
 }
 
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry:	object to be linked
+ * @dir:	new parent
+ * @new_dentry:	where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode.  The caller should then break the delegation
+ * and retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
 {
 	struct inode *inode = old_dentry->d_inode;
 	unsigned max_links = dir->i_sb->s_max_links;
@@ -3855,8 +3874,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 		error =  -ENOENT;
 	else if (max_links && inode->i_nlink >= max_links)
 		error = -EMLINK;
-	else
-		error = dir->i_op->link(old_dentry, dir, new_dentry);
+	else {
+		error = try_break_deleg(inode, delegated_inode);
+		if (!error)
+			error = dir->i_op->link(old_dentry, dir, new_dentry);
+	}
 
 	if (!error && (inode->i_state & I_LINKABLE)) {
 		spin_lock(&inode->i_lock);
@@ -3883,6 +3905,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
 {
 	struct dentry *new_dentry;
 	struct path old_path, new_path;
+	struct inode *delegated_inode = NULL;
 	int how = 0;
 	int error;
 
@@ -3921,9 +3944,14 @@ retry:
 	error = security_path_link(old_path.dentry, &new_path, new_dentry);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
 out_dput:
 	done_path_create(&new_path, new_dentry);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry;
+	}
 	if (retry_estale(error, how)) {
 		how |= LOOKUP_REVAL;
 		goto retry;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 45bf0295894d..27ba21b5f383 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1736,7 +1736,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 		err = nfserrno(host_err);
 		goto out_dput;
 	}
-	host_err = vfs_link(dold, dirp, dnew);
+	host_err = vfs_link(dold, dirp, dnew, NULL);
 	if (!host_err) {
 		err = nfserrno(commit_metadata(ffhp));
 		if (!err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bcff883fa90..6e36e7118ec1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1453,7 +1453,7 @@ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
-extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);

From 27ac0ffeac80ba6b9580529568d06144df044366 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 20 Sep 2011 17:19:26 -0400
Subject: [PATCH 100/101] locks: break delegations on any attribute
 modification

NFSv4 uses leases to guarantee that clients can cache metadata as well
as data.

Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c   |  4 ++--
 fs/attr.c                 | 25 ++++++++++++++++++++++++-
 fs/cachefiles/interface.c |  4 ++--
 fs/ecryptfs/inode.c       |  4 ++--
 fs/hpfs/namei.c           |  2 +-
 fs/inode.c                |  6 +++++-
 fs/nfsd/vfs.c             |  8 ++++++--
 fs/open.c                 | 22 ++++++++++++++++++----
 fs/utimes.c               |  9 ++++++++-
 include/linux/fs.h        |  2 +-
 10 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 1b8490e2fbde..0f3820121e02 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -216,7 +216,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 		newattrs.ia_gid = gid;
 		newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
 		mutex_lock(&dentry->d_inode->i_mutex);
-		notify_change(dentry, &newattrs);
+		notify_change(dentry, &newattrs, NULL);
 		mutex_unlock(&dentry->d_inode->i_mutex);
 
 		/* mark as kernel-created inode */
@@ -322,7 +322,7 @@ static int handle_remove(const char *nodename, struct device *dev)
 			newattrs.ia_valid =
 				ATTR_UID|ATTR_GID|ATTR_MODE;
 			mutex_lock(&dentry->d_inode->i_mutex);
-			notify_change(dentry, &newattrs);
+			notify_change(dentry, &newattrs, NULL);
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
 			if (!err || err == -ENOENT)
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..267968d94673 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 }
 EXPORT_SYMBOL(setattr_copy);
 
-int notify_change(struct dentry * dentry, struct iattr * attr)
+/**
+ * notify_change - modify attributes of a filesytem object
+ * @dentry:	object affected
+ * @iattr:	new attributes
+ * @delegated_inode: returns inode, if the inode is delegated
+ *
+ * The caller must hold the i_mutex on the affected object.
+ *
+ * If notify_change discovers a delegation in need of breaking,
+ * it will return -EWOULDBLOCK and return a reference to the inode in
+ * delegated_inode.  The caller should then break the delegation and
+ * retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.  Also, passing NULL is fine for callers holding
+ * the file open for write, as there can be no conflicting delegation in
+ * that case.
+ */
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
 {
 	struct inode *inode = dentry->d_inode;
 	umode_t mode = inode->i_mode;
@@ -241,6 +261,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 		return 0;
 
 	error = security_inode_setattr(dentry, attr);
+	if (error)
+		return error;
+	error = try_break_deleg(inode, delegated_inode);
 	if (error)
 		return error;
 
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 43eb5592cdea..5088a418ac4d 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
 		_debug("discard tail %llx", oi_size);
 		newattrs.ia_valid = ATTR_SIZE;
 		newattrs.ia_size = oi_size & PAGE_MASK;
-		ret = notify_change(object->backer, &newattrs);
+		ret = notify_change(object->backer, &newattrs, NULL);
 		if (ret < 0)
 			goto truncate_failed;
 	}
 
 	newattrs.ia_valid = ATTR_SIZE;
 	newattrs.ia_size = ni_size;
-	ret = notify_change(object->backer, &newattrs);
+	ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
 	mutex_unlock(&object->backer->d_inode->i_mutex);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1c628f023041..c36c44824471 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -882,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 		struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
 		mutex_lock(&lower_dentry->d_inode->i_mutex);
-		rc = notify_change(lower_dentry, &lower_ia);
+		rc = notify_change(lower_dentry, &lower_ia, NULL);
 		mutex_unlock(&lower_dentry->d_inode->i_mutex);
 	}
 	return rc;
@@ -983,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 		lower_ia.ia_valid &= ~ATTR_MODE;
 
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
-	rc = notify_change(lower_dentry, &lower_ia);
+	rc = notify_change(lower_dentry, &lower_ia, NULL);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
 	fsstack_copy_attr_all(inode, lower_inode);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 345713d2f8f3..1b39afdd86fd 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -407,7 +407,7 @@ again:
 			/*printk("HPFS: truncating file before delete.\n");*/
 			newattrs.ia_size = 0;
 			newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-			err = notify_change(dentry, &newattrs);
+			err = notify_change(dentry, &newattrs, NULL);
 			put_write_access(inode);
 			if (!err)
 				goto again;
diff --git a/fs/inode.c b/fs/inode.c
index ce48c359ce9e..4bcdad3c9361 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1603,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill)
 	struct iattr newattrs;
 
 	newattrs.ia_valid = ATTR_FORCE | kill;
-	return notify_change(dentry, &newattrs);
+	/*
+	 * Note we call this on write, so notify_change will not
+	 * encounter any conflicting delegations:
+	 */
+	return notify_change(dentry, &newattrs, NULL);
 }
 
 int file_remove_suid(struct file *file)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 27ba21b5f383..94b5f5d2bfed 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 			goto out_nfserr;
 		fh_lock(fhp);
 
-		host_err = notify_change(dentry, iap);
+		host_err = notify_change(dentry, iap, NULL);
 		err = nfserrno(host_err);
 		fh_unlock(fhp);
 	}
@@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry)
 	ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	notify_change(dentry, &ia);
+	/*
+	 * Note we call this on write, so notify_change will not
+	 * encounter any conflicting delegations:
+	 */
+	notify_change(dentry, &ia, NULL);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
diff --git a/fs/open.c b/fs/open.c
index fffbed40dbe9..4b3e1edf2fe4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 		newattrs.ia_valid |= ret | ATTR_FORCE;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	ret = notify_change(dentry, &newattrs);
+	/* Note any delegations or leases have already been broken: */
+	ret = notify_change(dentry, &newattrs, NULL);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 	return ret;
 }
@@ -464,21 +465,28 @@ out:
 static int chmod_common(struct path *path, umode_t mode)
 {
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 	struct iattr newattrs;
 	int error;
 
 	error = mnt_want_write(path->mnt);
 	if (error)
 		return error;
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chmod(path, mode);
 	if (error)
 		goto out_unlock;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path->dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs, &delegated_inode);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	mnt_drop_write(path->mnt);
 	return error;
 }
@@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 static int chown_common(struct path *path, uid_t user, gid_t group)
 {
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 	int error;
 	struct iattr newattrs;
 	kuid_t uid;
@@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
-		error = notify_change(path->dentry, &newattrs);
+		error = notify_change(path->dentry, &newattrs, &delegated_inode);
 	mutex_unlock(&inode->i_mutex);
-
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 	return error;
 }
 
diff --git a/fs/utimes.c b/fs/utimes.c
index f4fb7eca10e8..aa138d64560a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times)
 	int error;
 	struct iattr newattrs;
 	struct inode *inode = path->dentry->d_inode;
+	struct inode *delegated_inode = NULL;
 
 	error = mnt_want_write(path->mnt);
 	if (error)
@@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times)
 				goto mnt_drop_write_and_out;
 		}
 	}
+retry_deleg:
 	mutex_lock(&inode->i_mutex);
-	error = notify_change(path->dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs, &delegated_inode);
 	mutex_unlock(&inode->i_mutex);
+	if (delegated_inode) {
+		error = break_deleg_wait(&delegated_inode);
+		if (!error)
+			goto retry_deleg;
+	}
 
 mnt_drop_write_and_out:
 	mnt_drop_write(path->mnt);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e36e7118ec1..ab2a0ca82dc5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2278,7 +2278,7 @@ extern void emergency_remount(void);
 #ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
 #endif
-extern int notify_change(struct dentry *, struct iattr *);
+extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
 

From bdd3536618443809d18868563eeafa63b9d29603 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Nov 2013 22:19:48 -0500
Subject: [PATCH 101/101] ecryptfs: ->f_op is never NULL

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 6b1cd2b0c751..2229a74aeeed 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	if (ecryptfs_file_to_private(file))
 		lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+	if (lower_file->f_op->unlocked_ioctl)
 		rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
 	return rc;
 }