From 18156e7e66e5147d2483382a1e3817d2401ca1a8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:08 -0500
Subject: [PATCH 01/17] MAINTAINERS: update entry for fs/locks.c

Both Bruce and I have done a fair bit of work in these files recently,
and would like to be notified if anyone is proposing changes to it.

Also, Matthew is no longer interested in maintaining this code, so
remove him.

Cc: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: "J. Bruce Fields" <bfields@fieldses.org>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b2cf5cfb4d29..dc395b18b86a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3436,7 +3436,8 @@ F:	include/scsi/libfcoe.h
 F:	include/uapi/scsi/fc/
 
 FILE LOCKING (flock() and fcntl()/lockf())
-M:	Matthew Wilcox <matthew@wil.cx>
+M:	Jeff Layton <jlayton@redhat.com>
+M:	J. Bruce Fields <bfields@fieldses.org>
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
 F:	include/linux/fcntl.h

From 24cbe7845ea50b636ab2218b9d648270ff55f148 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:06 -0500
Subject: [PATCH 02/17] locks: close potential race between setlease and open

As Al Viro points out, there is an unlikely, but possible race between
opening a file and setting a lease on it. generic_add_lease is done with
the i_lock held, but the inode->i_flock check in break_lease is
lockless. It's possible for another task doing an open to do the entire
pathwalk and call break_lease between the point where generic_add_lease
checks for a conflicting open and adds the lease to the list. If this
occurs, we can end up with a lease set on the file with a conflicting
open.

To guard against that, check again for a conflicting open after adding
the lease to the i_flock list. If the above race occurs, then we can
simply unwind the lease setting and return -EAGAIN.

Because we take dentry references and acquire write access on the file
before calling break_lease, we know that if the i_flock list is empty
when the open caller goes to check it then the necessary refcounts have
already been incremented. Thus the additional check for a conflicting
open will see that there is one and the setlease call will fail.

Cc: Bruce Fields <bfields@fieldses.org>
Cc: David Howells <dhowells@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@fieldses.org>
---
 fs/locks.c         | 75 ++++++++++++++++++++++++++++++++++++++--------
 include/linux/fs.h |  6 ++++
 2 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 92a0f0a52b06..2cfeea622f28 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -652,15 +652,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
 	locks_insert_global_locks(fl);
 }
 
-/*
- * Delete a lock and then free it.
- * Wake up processes that are blocked waiting for this lock,
- * notify the FS that the lock has been cleared and
- * finally free the lock.
+/**
+ * locks_delete_lock - Delete a lock and then free it.
+ * @thisfl_p: pointer that points to the fl_next field of the previous
+ * 	      inode->i_flock list entry
+ *
+ * Unlink a lock from all lists and free the namespace reference, but don't
+ * free it yet. Wake up processes that are blocked waiting for this lock and
+ * notify the FS that the lock has been cleared.
  *
  * Must be called with the i_lock held!
  */
-static void locks_delete_lock(struct file_lock **thisfl_p)
+static void locks_unlink_lock(struct file_lock **thisfl_p)
 {
 	struct file_lock *fl = *thisfl_p;
 
@@ -675,6 +678,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
 	}
 
 	locks_wake_up_blocks(fl);
+}
+
+/*
+ * Unlink a lock from all lists and free it.
+ *
+ * Must be called with i_lock held!
+ */
+static void locks_delete_lock(struct file_lock **thisfl_p)
+{
+	struct file_lock *fl = *thisfl_p;
+
+	locks_unlink_lock(thisfl_p);
 	locks_free_lock(fl);
 }
 
@@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp)
 	return type;
 }
 
+/**
+ * check_conflicting_open - see if the given dentry points to a file that has
+ * 			    an existing open that would conflict with the
+ * 			    desired lease.
+ * @dentry:	dentry to check
+ * @arg:	type of lease that we're trying to acquire
+ *
+ * Check to see if there's an existing open fd on this file that would
+ * conflict with the lease we're trying to set.
+ */
+static int
+check_conflicting_open(const struct dentry *dentry, const long arg)
+{
+	int ret = 0;
+	struct inode *inode = dentry->d_inode;
+
+	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
+		return -EAGAIN;
+
+	if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
+	    (atomic_read(&inode->i_count) > 1)))
+		ret = -EAGAIN;
+
+	return ret;
+}
+
 static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
@@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 		return -EINVAL;
 	}
 
-	error = -EAGAIN;
-	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
-		goto out;
-	if ((arg == F_WRLCK)
-	    && ((d_count(dentry) > 1)
-		|| (atomic_read(&inode->i_count) > 1)))
+	error = check_conflicting_open(dentry, arg);
+	if (error)
 		goto out;
 
 	/*
@@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 		goto out;
 
 	locks_insert_lock(before, lease);
-	error = 0;
+	/*
+	 * The check in break_lease() is lockless. It's possible for another
+	 * open to race in after we did the earlier check for a conflicting
+	 * open but before the lease was inserted. Check again for a
+	 * conflicting open and cancel the lease if there is one.
+	 *
+	 * We also add a barrier here to ensure that the insertion of the lock
+	 * precedes these checks.
+	 */
+	smp_mb();
+	error = check_conflicting_open(dentry, arg);
+	if (error)
+		locks_unlink_lock(flp);
 out:
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c59813..df8474408331 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1964,6 +1964,12 @@ static inline int locks_verify_truncate(struct inode *inode,
 
 static inline int break_lease(struct inode *inode, unsigned int mode)
 {
+	/*
+	 * Since this check is lockless, we must ensure that any refcounts
+	 * taken are done before checking inode->i_flock. Otherwise, we could
+	 * end up racing with tasks trying to set a new lease on this file.
+	 */
+	smp_mb();
 	if (inode->i_flock)
 		return __break_lease(inode, mode, FL_LEASE);
 	return 0;

From 46dad7603f21d64207820580c0bafd47934686d4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:06 -0500
Subject: [PATCH 03/17] locks: clean up comment typo

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/locks.c b/fs/locks.c
index 2cfeea622f28..5e28612120c2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -581,7 +581,7 @@ static void locks_delete_block(struct file_lock *waiter)
  * it seems like the reasonable thing to do.
  *
  * Must be called with both the i_lock and blocked_lock_lock held. The fl_block
- * list itself is protected by the file_lock_list, but by ensuring that the
+ * list itself is protected by the blocked_lock_lock, but by ensuring that the
  * i_lock is also held on insertions we can avoid taking the blocked_lock_lock
  * in some cases when we see that the fl_block list is empty.
  */

From 6ca10ed8edfd84b1930affdae8ffaa787057296b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:07 -0500
Subject: [PATCH 04/17] locks: remove "inline" qualifier from fl_link
 manipulation functions

It's best to let the compiler decide that.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 5e28612120c2..049a14402ee4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -511,8 +511,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 }
 
 /* Must be called with the i_lock held! */
-static inline void
-locks_insert_global_locks(struct file_lock *fl)
+static void locks_insert_global_locks(struct file_lock *fl)
 {
 	lg_local_lock(&file_lock_lglock);
 	fl->fl_link_cpu = smp_processor_id();
@@ -521,8 +520,7 @@ locks_insert_global_locks(struct file_lock *fl)
 }
 
 /* Must be called with the i_lock held! */
-static inline void
-locks_delete_global_locks(struct file_lock *fl)
+static void locks_delete_global_locks(struct file_lock *fl)
 {
 	/*
 	 * Avoid taking lock if already unhashed. This is safe since this check
@@ -544,14 +542,12 @@ posix_owner_key(struct file_lock *fl)
 	return (unsigned long)fl->fl_owner;
 }
 
-static inline void
-locks_insert_global_blocked(struct file_lock *waiter)
+static void locks_insert_global_blocked(struct file_lock *waiter)
 {
 	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
 }
 
-static inline void
-locks_delete_global_blocked(struct file_lock *waiter)
+static void locks_delete_global_blocked(struct file_lock *waiter)
 {
 	hash_del(&waiter->fl_link);
 }

From b03dfdec0381857db2c01c877b7064f3f5d97d7e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:07 -0500
Subject: [PATCH 05/17] locks: add __acquires and __releases annotations to
 locks_start and locks_stop

...to make sparse happy.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 049a14402ee4..6084f5a32e9c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2430,6 +2430,7 @@ static int locks_show(struct seq_file *f, void *v)
 }
 
 static void *locks_start(struct seq_file *f, loff_t *pos)
+	__acquires(&blocked_lock_lock)
 {
 	struct locks_iterator *iter = f->private;
 
@@ -2448,6 +2449,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
 }
 
 static void locks_stop(struct seq_file *f, void *v)
+	__releases(&blocked_lock_lock)
 {
 	spin_unlock(&blocked_lock_lock);
 	lg_global_unlock(&file_lock_lglock);

From 8c3cac5e6a85f03602ffe09c44f14418699e31ec Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:07 -0500
Subject: [PATCH 06/17] locks: eliminate BUG() call when there's an unexpected
 lock on file close

A leftover lock on the list is surely a sign of a problem of some sort,
but it's not necessarily a reason to panic the box. Instead, just log a
warning with some info about the lock, and then delete it like we would
any other lock.

In the event that the filesystem declares a ->lock f_op, we may end up
leaking something, but that's generally preferable to an immediate
panic.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 6084f5a32e9c..dd309333afc9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2281,16 +2281,28 @@ void locks_remove_flock(struct file *filp)
 
 	while ((fl = *before) != NULL) {
 		if (fl->fl_file == filp) {
-			if (IS_FLOCK(fl)) {
-				locks_delete_lock(before);
-				continue;
-			}
 			if (IS_LEASE(fl)) {
 				lease_modify(before, F_UNLCK);
 				continue;
 			}
-			/* What? */
-			BUG();
+
+			/*
+			 * There's a leftover lock on the list of a type that
+			 * we didn't expect to see. Most likely a classic
+			 * POSIX lock that ended up not getting released
+			 * properly, or that raced onto the list somehow. Log
+			 * some info about it and then just remove it from
+			 * the list.
+			 */
+			WARN(!IS_FLOCK(fl),
+				"leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n",
+				MAJOR(inode->i_sb->s_dev),
+				MINOR(inode->i_sb->s_dev), inode->i_ino,
+				fl->fl_type, fl->fl_flags,
+				fl->fl_start, fl->fl_end);
+
+			locks_delete_lock(before);
+			continue;
  		}
 		before = &fl->fl_next;
 	}

From ef12e72a01f3022c55e5a8e0fa1caebdc7efcfce Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Feb 2014 12:13:08 -0500
Subject: [PATCH 07/17] locks: fix posix lock range overflow handling

In the 32-bit case fcntl assigns the 64-bit f_pos and i_size to a 32-bit
off_t.

The existing range checks also seem to depend on signed arithmetic
wrapping when it overflows.  In practice maybe that works, but we can be
more careful.  That also allows us to make a more reliable distinction
between -EINVAL and -EOVERFLOW.

Note that in the 32-bit case SEEK_CUR or SEEK_END might allow the caller
to set a lock with starting point no longer representable as a 32-bit
value.  We could return -EOVERFLOW in such cases, but the locks code is
capable of handling such ranges, so we choose to be lenient here.  The
only problem is that subsequent GETLK calls on such a lock will fail
with EOVERFLOW.

While we're here, do some cleanup including consolidating code for the
flock and flock64 cases.

Signed-off-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c                       | 144 ++++++++++++-------------------
 include/uapi/asm-generic/fcntl.h |   3 -
 2 files changed, 54 insertions(+), 93 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index dd309333afc9..b49e853a9c7b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -344,105 +344,69 @@ static int assign_type(struct file_lock *fl, long type)
 	return 0;
 }
 
+static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
+				 struct flock64 *l)
+{
+	switch (l->l_whence) {
+	case SEEK_SET:
+		fl->fl_start = 0;
+		break;
+	case SEEK_CUR:
+		fl->fl_start = filp->f_pos;
+		break;
+	case SEEK_END:
+		fl->fl_start = i_size_read(file_inode(filp));
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (l->l_start > OFFSET_MAX - fl->fl_start)
+		return -EOVERFLOW;
+	fl->fl_start += l->l_start;
+	if (fl->fl_start < 0)
+		return -EINVAL;
+
+	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
+	   POSIX-2001 defines it. */
+	if (l->l_len > 0) {
+		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
+			return -EOVERFLOW;
+		fl->fl_end = fl->fl_start + l->l_len - 1;
+
+	} else if (l->l_len < 0) {
+		if (fl->fl_start + l->l_len < 0)
+			return -EINVAL;
+		fl->fl_end = fl->fl_start - 1;
+		fl->fl_start += l->l_len;
+	} else
+		fl->fl_end = OFFSET_MAX;
+
+	fl->fl_owner = current->files;
+	fl->fl_pid = current->tgid;
+	fl->fl_file = filp;
+	fl->fl_flags = FL_POSIX;
+	fl->fl_ops = NULL;
+	fl->fl_lmops = NULL;
+
+	return assign_type(fl, l->l_type);
+}
+
 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
  * style lock.
  */
 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 			       struct flock *l)
 {
-	off_t start, end;
+	struct flock64 ll = {
+		.l_type = l->l_type,
+		.l_whence = l->l_whence,
+		.l_start = l->l_start,
+		.l_len = l->l_len,
+	};
 
-	switch (l->l_whence) {
-	case SEEK_SET:
-		start = 0;
-		break;
-	case SEEK_CUR:
-		start = filp->f_pos;
-		break;
-	case SEEK_END:
-		start = i_size_read(file_inode(filp));
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
-	   POSIX-2001 defines it. */
-	start += l->l_start;
-	if (start < 0)
-		return -EINVAL;
-	fl->fl_end = OFFSET_MAX;
-	if (l->l_len > 0) {
-		end = start + l->l_len - 1;
-		fl->fl_end = end;
-	} else if (l->l_len < 0) {
-		end = start - 1;
-		fl->fl_end = end;
-		start += l->l_len;
-		if (start < 0)
-			return -EINVAL;
-	}
-	fl->fl_start = start;	/* we record the absolute position */
-	if (fl->fl_end < fl->fl_start)
-		return -EOVERFLOW;
-	
-	fl->fl_owner = current->files;
-	fl->fl_pid = current->tgid;
-	fl->fl_file = filp;
-	fl->fl_flags = FL_POSIX;
-	fl->fl_ops = NULL;
-	fl->fl_lmops = NULL;
-
-	return assign_type(fl, l->l_type);
+	return flock64_to_posix_lock(filp, fl, &ll);
 }
 
-#if BITS_PER_LONG == 32
-static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
-				 struct flock64 *l)
-{
-	loff_t start;
-
-	switch (l->l_whence) {
-	case SEEK_SET:
-		start = 0;
-		break;
-	case SEEK_CUR:
-		start = filp->f_pos;
-		break;
-	case SEEK_END:
-		start = i_size_read(file_inode(filp));
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	start += l->l_start;
-	if (start < 0)
-		return -EINVAL;
-	fl->fl_end = OFFSET_MAX;
-	if (l->l_len > 0) {
-		fl->fl_end = start + l->l_len - 1;
-	} else if (l->l_len < 0) {
-		fl->fl_end = start - 1;
-		start += l->l_len;
-		if (start < 0)
-			return -EINVAL;
-	}
-	fl->fl_start = start;	/* we record the absolute position */
-	if (fl->fl_end < fl->fl_start)
-		return -EOVERFLOW;
-	
-	fl->fl_owner = current->files;
-	fl->fl_pid = current->tgid;
-	fl->fl_file = filp;
-	fl->fl_flags = FL_POSIX;
-	fl->fl_ops = NULL;
-	fl->fl_lmops = NULL;
-
-	return assign_type(fl, l->l_type);
-}
-#endif
-
 /* default lease lock manager operations */
 static void lease_break_callback(struct file_lock *fl)
 {
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 95e46c8e05f9..36025f77c6ed 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -186,8 +186,6 @@ struct flock {
 };
 #endif
 
-#ifndef CONFIG_64BIT
-
 #ifndef HAVE_ARCH_STRUCT_FLOCK64
 #ifndef __ARCH_FLOCK64_PAD
 #define __ARCH_FLOCK64_PAD
@@ -202,6 +200,5 @@ struct flock64 {
 	__ARCH_FLOCK64_PAD
 };
 #endif
-#endif /* !CONFIG_64BIT */
 
 #endif /* _ASM_GENERIC_FCNTL_H */

From bce7560d4946dcfc07b0217b1d3862ed60ff7188 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:08 -0500
Subject: [PATCH 08/17] locks: consolidate checks for compatible filp->f_mode
 values in setlk handlers

Move this check into flock64_to_posix_lock instead of duplicating it in
two places. This also fixes a minor wart in the code where we continue
referring to the struct flock after converting it to struct file_lock.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 46 ++++++++++++----------------------------------
 1 file changed, 12 insertions(+), 34 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index b49e853a9c7b..4cd25781b0a4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -388,6 +388,18 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 	fl->fl_ops = NULL;
 	fl->fl_lmops = NULL;
 
+	/* Ensure that fl->fl_filp has compatible f_mode */
+	switch (l->l_type) {
+	case F_RDLCK:
+		if (!(filp->f_mode & FMODE_READ))
+			return -EBADF;
+		break;
+	case F_WRLCK:
+		if (!(filp->f_mode & FMODE_WRITE))
+			return -EBADF;
+		break;
+	}
+
 	return assign_type(fl, l->l_type);
 }
 
@@ -2025,23 +2037,6 @@ again:
 		file_lock->fl_flags |= FL_SLEEP;
 	}
 	
-	error = -EBADF;
-	switch (flock.l_type) {
-	case F_RDLCK:
-		if (!(filp->f_mode & FMODE_READ))
-			goto out;
-		break;
-	case F_WRLCK:
-		if (!(filp->f_mode & FMODE_WRITE))
-			goto out;
-		break;
-	case F_UNLCK:
-		break;
-	default:
-		error = -EINVAL;
-		goto out;
-	}
-
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
@@ -2143,23 +2138,6 @@ again:
 		file_lock->fl_flags |= FL_SLEEP;
 	}
 	
-	error = -EBADF;
-	switch (flock.l_type) {
-	case F_RDLCK:
-		if (!(filp->f_mode & FMODE_READ))
-			goto out;
-		break;
-	case F_WRLCK:
-		if (!(filp->f_mode & FMODE_WRITE))
-			goto out;
-		break;
-	case F_UNLCK:
-		break;
-	default:
-		error = -EINVAL;
-		goto out;
-	}
-
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*

From 78ed8a13382b1354e95d0f2233577eba15cb8171 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:08 -0500
Subject: [PATCH 09/17] locks: rename locks_remove_flock to locks_remove_file

This function currently removes leases in addition to flock locks and in
a later patch we'll have it deal with file-private locks too. Rename it
to locks_remove_file to indicate that it removes locks that are
associated with a particular struct file, and not just flock locks.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/file_table.c    | 2 +-
 fs/locks.c         | 2 +-
 include/linux/fs.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 5fff9030be34..468543c1973d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -234,7 +234,7 @@ static void __fput(struct file *file)
 	 * in the file cleanup chain.
 	 */
 	eventpoll_release(file);
-	locks_remove_flock(file);
+	locks_remove_file(file);
 
 	if (unlikely(file->f_flags & FASYNC)) {
 		if (file->f_op->fasync)
diff --git a/fs/locks.c b/fs/locks.c
index 4cd25781b0a4..4d4e790150e0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2196,7 +2196,7 @@ EXPORT_SYMBOL(locks_remove_posix);
 /*
  * This function is called on the last close of an open file.
  */
-void locks_remove_flock(struct file *filp)
+void locks_remove_file(struct file *filp)
 {
 	struct inode * inode = file_inode(filp);
 	struct file_lock *fl;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index df8474408331..7527d96913d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1012,7 +1012,7 @@ extern struct file_lock * locks_alloc_lock(void);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
-extern void locks_remove_flock(struct file *);
+extern void locks_remove_file(struct file *);
 extern void locks_release_private(struct file_lock *);
 extern void posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
@@ -1083,7 +1083,7 @@ static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	return;
 }
 
-static inline void locks_remove_flock(struct file *filp)
+static inline void locks_remove_file(struct file *filp)
 {
 	return;
 }

From c918d42a27a9be0d78be490997d16d79cd5b9193 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:09 -0500
Subject: [PATCH 10/17] locks: make /proc/locks show IS_FILE_PVT locks as type
 "FLPVT"

In a later patch, we'll be adding a new type of lock that's owned by
the struct file instead of the files_struct. Those sorts of locks
will be flagged with a new FL_FILE_PVT flag.

Report these types of locks as "FLPVT" in /proc/locks to distinguish
them from "classic" POSIX locks.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c         | 11 +++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 4d4e790150e0..0e1f0df8de12 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -135,6 +135,7 @@
 #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG))
+#define IS_FILE_PVT(fl)	(fl->fl_flags & FL_FILE_PVT)
 
 static bool lease_breaking(struct file_lock *fl)
 {
@@ -2313,8 +2314,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 
 	seq_printf(f, "%lld:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
-		seq_printf(f, "%6s %s ",
-			     (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
+		if (fl->fl_flags & FL_ACCESS)
+			seq_printf(f, "ACCESS");
+		else if (IS_FILE_PVT(fl))
+			seq_printf(f, "FLPVT ");
+		else
+			seq_printf(f, "POSIX ");
+
+		seq_printf(f, " %s ",
 			     (inode == NULL) ? "*NOINODE*" :
 			     mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
 	} else if (IS_FLOCK(fl)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7527d96913d3..5ddeb8de5e77 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -888,6 +888,7 @@ static inline int file_check_writeable(struct file *filp)
 #define FL_SLEEP	128	/* A blocking lock */
 #define FL_DOWNGRADE_PENDING	256 /* Lease is being downgraded */
 #define FL_UNLOCK_PENDING	512 /* Lease is being broken */
+#define FL_FILE_PVT	1024	/* lock is private to the file */
 
 /*
  * Special return value from posix_lock_file() and vfs_lock_file() for

From 3fd80cddc6af5ba53244514a61450d4b30a9fb9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:09 -0500
Subject: [PATCH 11/17] locks: report l_pid as -1 for FL_FILE_PVT locks

FL_FILE_PVT locks are no longer tied to a particular pid, and are
instead inheritable by child processes. Report a l_pid of '-1' for
these sorts of locks since the pid is somewhat meaningless for them.

This precedent comes from FreeBSD. There, POSIX and flock() locks can
conflict with one another. If fcntl(F_GETLK, ...) returns a lock set
with flock() then the l_pid member cannot be a process ID because the
lock is not held by a process as such.

Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 0e1f0df8de12..57f1d5fc876a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1864,7 +1864,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
 
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-	flock->l_pid = fl->fl_pid;
+	flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
 #if BITS_PER_LONG == 32
 	/*
 	 * Make sure we can represent the posix lock via
@@ -1886,7 +1886,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-	flock->l_pid = fl->fl_pid;
+	flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
 	flock->l_start = fl->fl_start;
 	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
 		fl->fl_end - fl->fl_start + 1;

From c1e62b8fc355e0c3706f1ae0dacb72d1c514dc80 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:09 -0500
Subject: [PATCH 12/17] locks: pass the cmd value to fcntl_getlk/getlk64

Once we introduce file private locks, we'll need to know what cmd value
was used, as that affects the ownership and whether a conflict would
arise.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/fcntl.c         |  4 ++--
 fs/locks.c         |  4 ++--
 include/linux/fs.h | 10 ++++++----
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index ef6866592a0f..7ef7f2d2b608 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -273,7 +273,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		err = setfl(fd, filp, arg);
 		break;
 	case F_GETLK:
-		err = fcntl_getlk(filp, (struct flock __user *) arg);
+		err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
 		break;
 	case F_SETLK:
 	case F_SETLKW:
@@ -389,7 +389,7 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 	
 	switch (cmd) {
 		case F_GETLK64:
-			err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
+			err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
 			break;
 		case F_SETLK64:
 		case F_SETLKW64:
diff --git a/fs/locks.c b/fs/locks.c
index 57f1d5fc876a..442052b413af 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1898,7 +1898,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk(struct file *filp, struct flock __user *l)
+int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 {
 	struct file_lock file_lock;
 	struct flock flock;
@@ -2066,7 +2066,7 @@ out:
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
-int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
+int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 {
 	struct file_lock file_lock;
 	struct flock64 flock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ddeb8de5e77..ae91dce8a547 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -993,12 +993,12 @@ struct file_lock {
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 #ifdef CONFIG_FILE_LOCKING
-extern int fcntl_getlk(struct file *, struct flock __user *);
+extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
 			struct flock __user *);
 
 #if BITS_PER_LONG == 32
-extern int fcntl_getlk64(struct file *, struct flock64 __user *);
+extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
 			struct flock64 __user *);
 #endif
@@ -1031,7 +1031,8 @@ extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 #else /* !CONFIG_FILE_LOCKING */
-static inline int fcntl_getlk(struct file *file, struct flock __user *user)
+static inline int fcntl_getlk(struct file *file, unsigned int cmd,
+			      struct flock __user *user)
 {
 	return -EINVAL;
 }
@@ -1043,7 +1044,8 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file,
 }
 
 #if BITS_PER_LONG == 32
-static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
+static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
+				struct flock64 __user *user)
 {
 	return -EINVAL;
 }

From 57b65325fe34ec4c917bc4e555144b4a94d9e1f7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:09 -0500
Subject: [PATCH 13/17] locks: skip deadlock detection on FL_FILE_PVT locks

It's not really feasible to do deadlock detection with FL_FILE_PVT
locks since they aren't owned by a single task, per-se. Deadlock
detection also tends to be rather expensive so just skip it for
these sorts of locks.

Also, add a FIXME comment about adding more limited deadlock detection
that just applies to ro -> rw upgrades, per Andy's request.

Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 442052b413af..ed9fb769b88e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,
 	BUG_ON(!list_empty(&waiter->fl_block));
 	waiter->fl_next = blocker;
 	list_add_tail(&waiter->fl_block, &blocker->fl_block);
-	if (IS_POSIX(blocker))
+	if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker))
 		locks_insert_global_blocked(waiter);
 }
 
@@ -757,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock);
  * Note: the above assumption may not be true when handling lock
  * requests from a broken NFS client. It may also fail in the presence
  * of tasks (such as posix threads) sharing the same open file table.
- *
  * To handle those cases, we just bail out after a few iterations.
+ *
+ * For FL_FILE_PVT locks, the owner is the filp, not the files_struct.
+ * Because the owner is not even nominally tied to a thread of
+ * execution, the deadlock detection below can't reasonably work well. Just
+ * skip it for those.
+ *
+ * In principle, we could do a more limited deadlock detection on FL_FILE_PVT
+ * locks that just checks for the case where two tasks are attempting to
+ * upgrade from read to write locks on the same inode.
  */
 
 #define MAX_DEADLK_ITERATIONS 10
@@ -781,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
 {
 	int i = 0;
 
+	/*
+	 * This deadlock detector can't reasonably detect deadlocks with
+	 * FL_FILE_PVT locks, since they aren't owned by a process, per-se.
+	 */
+	if (IS_FILE_PVT(caller_fl))
+		return 0;
+
 	while ((block_fl = what_owner_is_waiting_for(block_fl))) {
 		if (i++ > MAX_DEADLK_ITERATIONS)
 			return 0;

From 5d50ffd7c31dab47c6b828841ca1ec70a1b40169 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 3 Feb 2014 12:13:10 -0500
Subject: [PATCH 14/17] locks: add new fcntl cmd values for handling file
 private locks

Due to some unfortunate history, POSIX locks have very strange and
unhelpful semantics. The thing that usually catches people by surprise
is that they are dropped whenever the process closes any file descriptor
associated with the inode.

This is extremely problematic for people developing file servers that
need to implement byte-range locks. Developers often need a "lock
management" facility to ensure that file descriptors are not closed
until all of the locks associated with the inode are finished.

Additionally, "classic" POSIX locks are owned by the process. Locks
taken between threads within the same process won't conflict with one
another, which renders them useless for synchronization between threads.

This patchset adds a new type of lock that attempts to address these
issues. These locks conflict with classic POSIX read/write locks, but
have semantics that are more like BSD locks with respect to inheritance
and behavior on close.

This is implemented primarily by changing how fl_owner field is set for
these locks. Instead of having them owned by the files_struct of the
process, they are instead owned by the filp on which they were acquired.
Thus, they are inherited across fork() and are only released when the
last reference to a filp is put.

These new semantics prevent them from being merged with classic POSIX
locks, even if they are acquired by the same process. These locks will
also conflict with classic POSIX locks even if they are acquired by
the same process or on the same file descriptor.

The new locks are managed using a new set of cmd values to the fcntl()
syscall. The initial implementation of this converts these values to
"classic" cmd values at a fairly high level, and the details are not
exposed to the underlying filesystem. We may eventually want to push
this handing out to the lower filesystem code but for now I don't
see any need for it.

Also, note that with this implementation the new cmd values are only
available via fcntl64() on 32-bit arches. There's little need to
add support for legacy apps on a new interface like this.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 arch/arm/kernel/sys_oabi-compat.c |  3 ++
 fs/compat.c                       | 35 +++++++++++++++++---
 fs/fcntl.c                        | 35 +++++++++++++-------
 fs/locks.c                        | 54 ++++++++++++++++++++++++++++---
 include/uapi/asm-generic/fcntl.h  | 16 +++++++++
 security/selinux/hooks.c          |  3 ++
 6 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 3e94811690ce..702bd329d9d0 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -203,6 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
 	int ret;
 
 	switch (cmd) {
+	case F_GETLKP:
+	case F_SETLKP:
+	case F_SETLKPW:
 	case F_GETLK64:
 	case F_SETLK64:
 	case F_SETLKW64:
diff --git a/fs/compat.c b/fs/compat.c
index 6af20de2c1a3..f340dcf11f68 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u
 }
 #endif
 
+static unsigned int
+convert_fcntl_cmd(unsigned int cmd)
+{
+	switch (cmd) {
+	case F_GETLK64:
+		return F_GETLK;
+	case F_SETLK64:
+		return F_SETLK;
+	case F_SETLKW64:
+		return F_SETLKW;
+	}
+
+	return cmd;
+}
+
 asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 		unsigned long arg)
 {
 	mm_segment_t old_fs;
 	struct flock f;
 	long ret;
+	unsigned int conv_cmd;
 
 	switch (cmd) {
 	case F_GETLK:
@@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 	case F_GETLK64:
 	case F_SETLK64:
 	case F_SETLKW64:
+	case F_GETLKP:
+	case F_SETLKP:
+	case F_SETLKPW:
 		ret = get_compat_flock64(&f, compat_ptr(arg));
 		if (ret != 0)
 			break;
 		old_fs = get_fs();
 		set_fs(KERNEL_DS);
-		ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK :
-				((cmd == F_SETLK64) ? F_SETLK : F_SETLKW),
-				(unsigned long)&f);
+		conv_cmd = convert_fcntl_cmd(cmd);
+		ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
 		set_fs(old_fs);
-		if (cmd == F_GETLK64 && ret == 0) {
+		if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
 			/* need to return lock information - see above for commentary */
 			if (f.l_start > COMPAT_LOFF_T_MAX)
 				ret = -EOVERFLOW;
@@ -471,8 +489,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
 		unsigned long arg)
 {
-	if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64))
+	switch (cmd) {
+	case F_GETLK64:
+	case F_SETLK64:
+	case F_SETLKW64:
+	case F_GETLKP:
+	case F_SETLKP:
+	case F_SETLKPW:
 		return -EINVAL;
+	}
 	return compat_sys_fcntl64(fd, cmd, arg);
 }
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 7ef7f2d2b608..9ead1596399a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_SETFL:
 		err = setfl(fd, filp, arg);
 		break;
+#if BITS_PER_LONG != 32
+	/* 32-bit arches must use fcntl64() */
+	case F_GETLKP:
+#endif
 	case F_GETLK:
 		err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
 		break;
+#if BITS_PER_LONG != 32
+	/* 32-bit arches must use fcntl64() */
+	case F_SETLKP:
+	case F_SETLKPW:
+#endif
+		/* Fallthrough */
 	case F_SETLK:
 	case F_SETLKW:
 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
@@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 		goto out1;
 	
 	switch (cmd) {
-		case F_GETLK64:
-			err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
-			break;
-		case F_SETLK64:
-		case F_SETLKW64:
-			err = fcntl_setlk64(fd, f.file, cmd,
-					(struct flock64 __user *) arg);
-			break;
-		default:
-			err = do_fcntl(fd, cmd, arg, f.file);
-			break;
+	case F_GETLK64:
+	case F_GETLKP:
+		err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
+		break;
+	case F_SETLK64:
+	case F_SETLKW64:
+	case F_SETLKP:
+	case F_SETLKPW:
+		err = fcntl_setlk64(fd, f.file, cmd,
+				(struct flock64 __user *) arg);
+		break;
+	default:
+		err = do_fcntl(fd, cmd, arg, f.file);
+		break;
 	}
 out1:
 	fdput(f);
diff --git a/fs/locks.c b/fs/locks.c
index ed9fb769b88e..3b54b98236ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1930,6 +1930,12 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 	if (error)
 		goto out;
 
+	if (cmd == F_GETLKP) {
+		cmd = F_GETLK;
+		file_lock.fl_flags |= FL_FILE_PVT;
+		file_lock.fl_owner = (fl_owner_t)filp;
+	}
+
 	error = vfs_test_lock(filp, &file_lock);
 	if (error)
 		goto out;
@@ -2049,10 +2055,26 @@ again:
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
-	if (cmd == F_SETLKW) {
+
+	/*
+	 * If the cmd is requesting file-private locks, then set the
+	 * FL_FILE_PVT flag and override the owner.
+	 */
+	switch (cmd) {
+	case F_SETLKP:
+		cmd = F_SETLK;
+		file_lock->fl_flags |= FL_FILE_PVT;
+		file_lock->fl_owner = (fl_owner_t)filp;
+		break;
+	case F_SETLKPW:
+		cmd = F_SETLKW;
+		file_lock->fl_flags |= FL_FILE_PVT;
+		file_lock->fl_owner = (fl_owner_t)filp;
+		/* Fallthrough */
+	case F_SETLKW:
 		file_lock->fl_flags |= FL_SLEEP;
 	}
-	
+
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
@@ -2098,6 +2120,12 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 	if (error)
 		goto out;
 
+	if (cmd == F_GETLKP) {
+		cmd = F_GETLK64;
+		file_lock.fl_flags |= FL_FILE_PVT;
+		file_lock.fl_owner = (fl_owner_t)filp;
+	}
+
 	error = vfs_test_lock(filp, &file_lock);
 	if (error)
 		goto out;
@@ -2150,10 +2178,26 @@ again:
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
 		goto out;
-	if (cmd == F_SETLKW64) {
+
+	/*
+	 * If the cmd is requesting file-private locks, then set the
+	 * FL_FILE_PVT flag and override the owner.
+	 */
+	switch (cmd) {
+	case F_SETLKP:
+		cmd = F_SETLK64;
+		file_lock->fl_flags |= FL_FILE_PVT;
+		file_lock->fl_owner = (fl_owner_t)filp;
+		break;
+	case F_SETLKPW:
+		cmd = F_SETLKW64;
+		file_lock->fl_flags |= FL_FILE_PVT;
+		file_lock->fl_owner = (fl_owner_t)filp;
+		/* Fallthrough */
+	case F_SETLKW64:
 		file_lock->fl_flags |= FL_SLEEP;
 	}
-	
+
 	error = do_lock_file_wait(filp, cmd, file_lock);
 
 	/*
@@ -2221,6 +2265,8 @@ void locks_remove_file(struct file *filp)
 	if (!inode->i_flock)
 		return;
 
+	locks_remove_posix(filp, (fl_owner_t)filp);
+
 	if (filp->f_op->flock) {
 		struct file_lock fl = {
 			.fl_pid = current->tgid,
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 36025f77c6ed..a9b13f8b3595 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -132,6 +132,22 @@
 #define F_GETOWNER_UIDS	17
 #endif
 
+/*
+ * fd "private" POSIX locks.
+ *
+ * Usually POSIX locks held by a process are released on *any* close and are
+ * not inherited across a fork().
+ *
+ * These cmd values will set locks that conflict with normal POSIX locks, but
+ * are "owned" by the opened file, not the process. This means that they are
+ * inherited across fork() like BSD (flock) locks, and they are only released
+ * automatically when the last reference to the the open file against which
+ * they were acquired is put.
+ */
+#define F_GETLKP	36
+#define F_SETLKP	37
+#define F_SETLKPW	38
+
 #define F_OWNER_TID	0
 #define F_OWNER_PID	1
 #define F_OWNER_PGRP	2
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4b34847208cc..3aa876374883 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3302,6 +3302,9 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd,
 	case F_GETLK:
 	case F_SETLK:
 	case F_SETLKW:
+	case F_GETLKP:
+	case F_SETLKP:
+	case F_SETLKPW:
 #if BITS_PER_LONG == 32
 	case F_GETLK64:
 	case F_SETLK64:

From 90478939dce096ed5b239cad16237dca0a59d66f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 4 Mar 2014 10:30:23 -0500
Subject: [PATCH 15/17] locks: require that flock->l_pid be set to 0 for
 file-private locks

Neil Brown suggested potentially overloading the l_pid value as a "lock
context" field for file-private locks. While I don't think we will
probably want to do that here, it's probably a good idea to ensure that
in the future we could extend this API without breaking existing
callers.

Typically the l_pid value is ignored for incoming struct flock
arguments, serving mainly as a place to return the pid of the owner if
there is a conflicting lock. For file-private locks, require that it
currently be set to 0 and return EINVAL if it isn't. If we eventually
want to make a non-zero l_pid mean something, then this will help ensure
that we don't break legacy programs that are using file-private locks.

Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index 3b54b98236ee..09d6c8c33c81 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1931,6 +1931,10 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 		goto out;
 
 	if (cmd == F_GETLKP) {
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_GETLK;
 		file_lock.fl_flags |= FL_FILE_PVT;
 		file_lock.fl_owner = (fl_owner_t)filp;
@@ -2062,11 +2066,19 @@ again:
 	 */
 	switch (cmd) {
 	case F_SETLKP:
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_SETLK;
 		file_lock->fl_flags |= FL_FILE_PVT;
 		file_lock->fl_owner = (fl_owner_t)filp;
 		break;
 	case F_SETLKPW:
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_SETLKW;
 		file_lock->fl_flags |= FL_FILE_PVT;
 		file_lock->fl_owner = (fl_owner_t)filp;
@@ -2121,6 +2133,10 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 		goto out;
 
 	if (cmd == F_GETLKP) {
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_GETLK64;
 		file_lock.fl_flags |= FL_FILE_PVT;
 		file_lock.fl_owner = (fl_owner_t)filp;
@@ -2185,11 +2201,19 @@ again:
 	 */
 	switch (cmd) {
 	case F_SETLKP:
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_SETLK64;
 		file_lock->fl_flags |= FL_FILE_PVT;
 		file_lock->fl_owner = (fl_owner_t)filp;
 		break;
 	case F_SETLKPW:
+		error = -EINVAL;
+		if (flock.l_pid != 0)
+			goto out;
+
 		cmd = F_SETLKW64;
 		file_lock->fl_flags |= FL_FILE_PVT;
 		file_lock->fl_owner = (fl_owner_t)filp;

From d7a06983a01a33605191c0766857b832ac32a2b6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 10 Mar 2014 09:54:15 -0400
Subject: [PATCH 16/17] locks: fix locks_mandatory_locked to respect
 file-private locks

As Trond pointed out, you can currently deadlock yourself by setting a
file-private lock on a file that requires mandatory locking and then
trying to do I/O on it.

Avoid this problem by plumbing some knowledge of file-private locks into
the mandatory locking code. In order to do this, we must pass down
information about the struct file that's being used to
locks_verify_locked.

Reported-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/locks.c         |  7 ++++---
 fs/namei.c         |  2 +-
 include/linux/fs.h | 22 +++++++++++-----------
 mm/mmap.c          |  2 +-
 mm/nommu.c         |  2 +-
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 09d6c8c33c81..d82c51c4fcd2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1155,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait);
 
 /**
  * locks_mandatory_locked - Check for an active lock
- * @inode: the file to check
+ * @file: the file to check
  *
  * Searches the inode's list of locks to find any POSIX locks which conflict.
  * This function is called from locks_verify_locked() only.
  */
-int locks_mandatory_locked(struct inode *inode)
+int locks_mandatory_locked(struct file *file)
 {
+	struct inode *inode = file_inode(file);
 	fl_owner_t owner = current->files;
 	struct file_lock *fl;
 
@@ -1172,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode)
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!IS_POSIX(fl))
 			continue;
-		if (fl->fl_owner != owner)
+		if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
 			break;
 	}
 	spin_unlock(&inode->i_lock);
diff --git a/fs/namei.c b/fs/namei.c
index d580df2e6804..dc51bac037c9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2542,7 +2542,7 @@ static int handle_truncate(struct file *filp)
 	/*
 	 * Refuse to truncate files with mandatory locks held on them.
 	 */
-	error = locks_verify_locked(inode);
+	error = locks_verify_locked(filp);
 	if (!error)
 		error = security_path_truncate(path);
 	if (!error) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae91dce8a547..4aa81e6ae067 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1912,6 +1912,11 @@ extern int current_umask(void);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 
+static inline struct inode *file_inode(struct file *f)
+{
+	return f->f_inode;
+}
+
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
@@ -1921,7 +1926,7 @@ extern struct kobject *fs_kobj;
 #define FLOCK_VERIFY_WRITE 2
 
 #ifdef CONFIG_FILE_LOCKING
-extern int locks_mandatory_locked(struct inode *);
+extern int locks_mandatory_locked(struct file *);
 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
 
 /*
@@ -1944,10 +1949,10 @@ static inline int mandatory_lock(struct inode *ino)
 	return IS_MANDLOCK(ino) && __mandatory_lock(ino);
 }
 
-static inline int locks_verify_locked(struct inode *inode)
+static inline int locks_verify_locked(struct file *file)
 {
-	if (mandatory_lock(inode))
-		return locks_mandatory_locked(inode);
+	if (mandatory_lock(file_inode(file)))
+		return locks_mandatory_locked(file);
 	return 0;
 }
 
@@ -2008,7 +2013,7 @@ static inline int break_deleg_wait(struct inode **delegated_inode)
 }
 
 #else /* !CONFIG_FILE_LOCKING */
-static inline int locks_mandatory_locked(struct inode *inode)
+static inline int locks_mandatory_locked(struct file *file)
 {
 	return 0;
 }
@@ -2030,7 +2035,7 @@ static inline int mandatory_lock(struct inode *inode)
 	return 0;
 }
 
-static inline int locks_verify_locked(struct inode *inode)
+static inline int locks_verify_locked(struct file *file)
 {
 	return 0;
 }
@@ -2297,11 +2302,6 @@ static inline bool execute_ok(struct inode *inode)
 	return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
 }
 
-static inline struct inode *file_inode(struct file *f)
-{
-	return f->f_inode;
-}
-
 static inline void file_start_write(struct file *file)
 {
 	if (!S_ISREG(file_inode(file)->i_mode))
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c33274c..5932ce961218 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1299,7 +1299,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			/*
 			 * Make sure there are no mandatory locks on the file.
 			 */
-			if (locks_verify_locked(inode))
+			if (locks_verify_locked(file))
 				return -EAGAIN;
 
 			vm_flags |= VM_SHARED | VM_MAYSHARE;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8740213b1647..a554e5a451cd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -995,7 +995,7 @@ static int validate_mmap_request(struct file *file,
 			    (file->f_mode & FMODE_WRITE))
 				return -EACCES;
 
-			if (locks_verify_locked(file_inode(file)))
+			if (locks_verify_locked(file))
 				return -EAGAIN;
 
 			if (!(capabilities & BDI_CAP_MAP_DIRECT))

From 29723adee11804b548903ddb1db666cf4a60f60e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 10 Mar 2014 09:54:19 -0400
Subject: [PATCH 17/17] locks: make locks_mandatory_area check for file-private
 locks

Allow locks_mandatory_area() to handle file-private locks correctly.
If there is a file-private lock set on an open file and we're doing I/O
via the same, then that should not cause anything to block.

Handle this by first doing a non-blocking FL_ACCESS check for a
file-private lock, and then fall back to checking for a classic POSIX
lock (and possibly blocking).

Note that this approach is subject to the same races that have always
plagued mandatory locking on Linux.

Reported-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index d82c51c4fcd2..13fc7a6d380a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1199,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 {
 	struct file_lock fl;
 	int error;
+	bool sleep = false;
 
 	locks_init_lock(&fl);
-	fl.fl_owner = current->files;
 	fl.fl_pid = current->tgid;
 	fl.fl_file = filp;
 	fl.fl_flags = FL_POSIX | FL_ACCESS;
 	if (filp && !(filp->f_flags & O_NONBLOCK))
-		fl.fl_flags |= FL_SLEEP;
+		sleep = true;
 	fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
 	fl.fl_start = offset;
 	fl.fl_end = offset + count - 1;
 
 	for (;;) {
+		if (filp) {
+			fl.fl_owner = (fl_owner_t)filp;
+			fl.fl_flags &= ~FL_SLEEP;
+			error = __posix_lock_file(inode, &fl, NULL);
+			if (!error)
+				break;
+		}
+
+		if (sleep)
+			fl.fl_flags |= FL_SLEEP;
+		fl.fl_owner = current->files;
 		error = __posix_lock_file(inode, &fl, NULL);
 		if (error != FILE_LOCK_DEFERRED)
 			break;