From b80d5ccca3a012e91ca64a2a0b13049163a6a698 Mon Sep 17 00:00:00 2001
From: Haiyan Hu <huhaiyan@huawei.com>
Date: Tue, 10 Sep 2013 11:25:37 +0800
Subject: [PATCH 001/171] NVMe: Avoid shift operation when writing cq head
 doorbell

Changes the type of dev->db_stride to unsigned and changes the value
stored there to be 1 << the current value. Then there is less
calculation to be done at completion time.

Signed-off-by: Haiyan Hu <huhaiyan@huawei.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 10 +++++-----
 include/linux/nvme.h      |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 26d03fa0bf26..073aec913c78 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -775,7 +775,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return 0;
 
-	writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
+	writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -1113,7 +1113,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	init_waitqueue_head(&nvmeq->sq_full);
 	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	bio_list_init(&nvmeq->sq_cong);
-	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
 	nvmeq->q_suspended = 1;
@@ -1149,7 +1149,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 	nvmeq->sq_tail = 0;
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
-	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	memset(nvmeq->cmdid_data, 0, extra);
 	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
 	nvme_cancel_ios(nvmeq, false);
@@ -1741,7 +1741,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 
 static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 {
-	return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
@@ -1958,7 +1958,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (!dev->bar)
 		goto disable;
 
-	dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
+	dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap));
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
 
 	return 0;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 26ebcf41c213..8119a476cb29 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -80,7 +80,7 @@ struct nvme_dev {
 	struct dma_pool *prp_small_pool;
 	int instance;
 	int queue_count;
-	int db_stride;
+	u32 db_stride;
 	u32 ctrl_config;
 	struct msix_entry *entry;
 	struct nvme_bar __iomem *bar;

From 481e5bad84239b01415b888df2040c1860ae3cfd Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Sat, 12 Oct 2013 06:23:29 +0200
Subject: [PATCH 002/171] NVMe: remove deprecated IRQF_DISABLED

This patch proposes to remove the use of the IRQF_DISABLED flag

It's a NOOP since 2.6.35 and it will be removed one day.

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 073aec913c78..df3daeb6227f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1134,11 +1134,10 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 {
 	if (use_threaded_interrupts)
 		return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
-					nvme_irq_check, nvme_irq,
-					IRQF_DISABLED | IRQF_SHARED,
+					nvme_irq_check, nvme_irq, IRQF_SHARED,
 					name, nvmeq);
 	return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
-				IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
+				IRQF_SHARED, name, nvmeq);
 }
 
 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)

From a6f089e95b1e08cdea9633d50ad20aa5d44ba64d Mon Sep 17 00:00:00 2001
From: Lior Amsalem <alior@marvell.com>
Date: Mon, 25 Nov 2013 17:26:44 +0100
Subject: [PATCH 003/171] irqchip: armada-370-xp: fix IPI race condition

In the Armada 370/XP driver, when we receive an IRQ 0, we read the
list of doorbells that caused the interrupt from register
ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS. This gives the list of IPIs that
were generated. However, instead of acknowledging only the IPIs that
were generated, we acknowledge *all* the IPIs, by writing
~IPI_DOORBELL_MASK in the ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register.

This creates a race condition: if a new IPI that isn't part of the
ones read into the temporary "ipimask" variable is fired before we
acknowledge all IPIs, then we will simply loose it. This is causing
scheduling hangs on SMP intensive workloads.

It is important to mention that this ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS
register has the following behavior: "A CPU write of 0 clears the bits
in this field. A CPU write of 1 has no effect". This is what allows us
to simply write ~ipimask to acknoledge the handled IPIs.

Notice that the same problem is present in the MSI implementation, but
it will be fixed as a separate patch, so that this IPI fix can be
pushed to older stable versions as appropriate (all the way to 3.8),
while the MSI code only appeared in 3.13.

Signed-off-by: Lior Amsalem <alior@marvell.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: stable@vger.kernel.org # v3.8+
Fixes: 344e873e5657e8dc0 'arm: mvebu: Add IPI support via doorbells'
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 433cc8568dec..f5e49a2d8e5a 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -407,7 +407,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 						ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS)
 				& IPI_DOORBELL_MASK;
 
-			writel(~IPI_DOORBELL_MASK, per_cpu_int_base +
+			writel(~ipimask, per_cpu_int_base +
 				ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS);
 
 			/* Handle all pending doorbells */

From c7f7bd4a136e4b02dd2a66bf95aec545bd93e8db Mon Sep 17 00:00:00 2001
From: Lior Amsalem <alior@marvell.com>
Date: Mon, 25 Nov 2013 17:26:45 +0100
Subject: [PATCH 004/171] irqchip: armada-370-xp: fix MSI race condition

In the Armada 370/XP driver, when we receive an IRQ 1, we read the
list of doorbells that caused the interrupt from register
ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS. This gives the list of MSIs that
were generated. However, instead of acknowledging only the MSIs that
were generated, we acknowledge *all* the MSIs, by writing
~MSI_DOORBELL_MASK in the ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register.

This creates a race condition: if a new MSI that isn't part of the
ones read into the temporary "msimask" variable is fired before we
acknowledge all MSIs, then we will simply loose it.

It is important to mention that this ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS
register has the following behavior: "A CPU write of 0 clears the bits
in this field. A CPU write of 1 has no effect". This is what allows us
to simply write ~msimask to acknoledge the handled MSIs.

Notice that the same problem is present in the IPI implementation, but
it is fixed as a separate patch, so that this IPI fix can be pushed to
older stable versions as appropriate (all the way to 3.8), while the
MSI code only appeared in 3.13.

Signed-off-by: Lior Amsalem <alior@marvell.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index f5e49a2d8e5a..3fac063b4a78 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -381,7 +381,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 						ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS)
 				& PCI_MSI_DOORBELL_MASK;
 
-			writel(~PCI_MSI_DOORBELL_MASK, per_cpu_int_base +
+			writel(~msimask, per_cpu_int_base +
 			       ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS);
 
 			for (msinr = PCI_MSI_DOORBELL_START;

From 320a382746e0ab1304476ea7e986a8d416ab99db Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 23 Oct 2013 13:07:34 -0600
Subject: [PATCH 005/171] NVMe: compat SG_IO ioctl

For 32-bit versions of sg3-utils running on a 64-bit system. This is
mostly a copy from the relevent portions of fs/compat_ioctl.c, with
slight modifications for going through block_device_operations.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@linux.intel.com>
[fixed up CONFIG_COMPAT=n build problems]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c |  18 ++++-
 drivers/block/nvme-scsi.c | 147 ++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h      |   1 +
 3 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index df3daeb6227f..e44350de8a21 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1568,10 +1568,26 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 	}
 }
 
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+					unsigned int cmd, unsigned long arg)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case SG_IO:
+		return nvme_sg_io32(ns, arg);
+	}
+	return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl	NULL
+#endif
+
 static const struct block_device_operations nvme_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= nvme_ioctl,
-	.compat_ioctl	= nvme_ioctl,
+	.compat_ioctl	= nvme_compat_ioctl,
 };
 
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 4a4ff4eb8e23..4a0ceb64e269 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -25,6 +25,7 @@
 #include <linux/bio.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
+#include <linux/compat.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -3038,6 +3039,152 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
 	return retcode;
 }
 
+#ifdef CONFIG_COMPAT
+typedef struct sg_io_hdr32 {
+	compat_int_t interface_id;	/* [i] 'S' for SCSI generic (required) */
+	compat_int_t dxfer_direction;	/* [i] data transfer direction  */
+	unsigned char cmd_len;		/* [i] SCSI command length ( <= 16 bytes) */
+	unsigned char mx_sb_len;		/* [i] max length to write to sbp */
+	unsigned short iovec_count;	/* [i] 0 implies no scatter gather */
+	compat_uint_t dxfer_len;		/* [i] byte count of data transfer */
+	compat_uint_t dxferp;		/* [i], [*io] points to data transfer memory
+					      or scatter gather list */
+	compat_uptr_t cmdp;		/* [i], [*i] points to command to perform */
+	compat_uptr_t sbp;		/* [i], [*o] points to sense_buffer memory */
+	compat_uint_t timeout;		/* [i] MAX_UINT->no timeout (unit: millisec) */
+	compat_uint_t flags;		/* [i] 0 -> default, see SG_FLAG... */
+	compat_int_t pack_id;		/* [i->o] unused internally (normally) */
+	compat_uptr_t usr_ptr;		/* [i->o] unused internally */
+	unsigned char status;		/* [o] scsi status */
+	unsigned char masked_status;	/* [o] shifted, masked scsi status */
+	unsigned char msg_status;		/* [o] messaging level data (optional) */
+	unsigned char sb_len_wr;		/* [o] byte count actually written to sbp */
+	unsigned short host_status;	/* [o] errors from host adapter */
+	unsigned short driver_status;	/* [o] errors from software driver */
+	compat_int_t resid;		/* [o] dxfer_len - actual_transferred */
+	compat_uint_t duration;		/* [o] time taken by cmd (unit: millisec) */
+	compat_uint_t info;		/* [o] auxiliary information */
+} sg_io_hdr32_t;  /* 64 bytes long (on sparc32) */
+
+typedef struct sg_iovec32 {
+	compat_uint_t iov_base;
+	compat_uint_t iov_len;
+} sg_iovec32_t;
+
+static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count)
+{
+	sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1);
+	sg_iovec32_t __user *iov32 = dxferp;
+	int i;
+
+	for (i = 0; i < iovec_count; i++) {
+		u32 base, len;
+
+		if (get_user(base, &iov32[i].iov_base) ||
+		    get_user(len, &iov32[i].iov_len) ||
+		    put_user(compat_ptr(base), &iov[i].iov_base) ||
+		    put_user(len, &iov[i].iov_len))
+			return -EFAULT;
+	}
+
+	if (put_user(iov, &sgio->dxferp))
+		return -EFAULT;
+	return 0;
+}
+
+int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg)
+{
+	sg_io_hdr32_t __user *sgio32 = (sg_io_hdr32_t __user *)arg;
+	sg_io_hdr_t __user *sgio;
+	u16 iovec_count;
+	u32 data;
+	void __user *dxferp;
+	int err;
+	int interface_id;
+
+	if (get_user(interface_id, &sgio32->interface_id))
+		return -EFAULT;
+	if (interface_id != 'S')
+		return -EINVAL;
+
+	if (get_user(iovec_count, &sgio32->iovec_count))
+		return -EFAULT;
+
+	{
+		void __user *top = compat_alloc_user_space(0);
+		void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) +
+				       (iovec_count * sizeof(sg_iovec_t)));
+		if (new > top)
+			return -EINVAL;
+
+		sgio = new;
+	}
+
+	/* Ok, now construct.  */
+	if (copy_in_user(&sgio->interface_id, &sgio32->interface_id,
+			 (2 * sizeof(int)) +
+			 (2 * sizeof(unsigned char)) +
+			 (1 * sizeof(unsigned short)) +
+			 (1 * sizeof(unsigned int))))
+		return -EFAULT;
+
+	if (get_user(data, &sgio32->dxferp))
+		return -EFAULT;
+	dxferp = compat_ptr(data);
+	if (iovec_count) {
+		if (sg_build_iovec(sgio, dxferp, iovec_count))
+			return -EFAULT;
+	} else {
+		if (put_user(dxferp, &sgio->dxferp))
+			return -EFAULT;
+	}
+
+	{
+		unsigned char __user *cmdp;
+		unsigned char __user *sbp;
+
+		if (get_user(data, &sgio32->cmdp))
+			return -EFAULT;
+		cmdp = compat_ptr(data);
+
+		if (get_user(data, &sgio32->sbp))
+			return -EFAULT;
+		sbp = compat_ptr(data);
+
+		if (put_user(cmdp, &sgio->cmdp) ||
+		    put_user(sbp, &sgio->sbp))
+			return -EFAULT;
+	}
+
+	if (copy_in_user(&sgio->timeout, &sgio32->timeout,
+			 3 * sizeof(int)))
+		return -EFAULT;
+
+	if (get_user(data, &sgio32->usr_ptr))
+		return -EFAULT;
+	if (put_user(compat_ptr(data), &sgio->usr_ptr))
+		return -EFAULT;
+
+	err = nvme_sg_io(ns, sgio);
+	if (err >= 0) {
+		void __user *datap;
+
+		if (copy_in_user(&sgio32->pack_id, &sgio->pack_id,
+				 sizeof(int)) ||
+		    get_user(datap, &sgio->usr_ptr) ||
+		    put_user((u32)(unsigned long)datap,
+			     &sgio32->usr_ptr) ||
+		    copy_in_user(&sgio32->status, &sgio->status,
+				 (4 * sizeof(unsigned char)) +
+				 (2 * sizeof(unsigned short)) +
+				 (3 * sizeof(int))))
+			err = -EFAULT;
+	}
+
+	return err;
+}
+#endif
+
 int nvme_sg_get_version_num(int __user *ip)
 {
 	return put_user(sg_version_num, ip);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8119a476cb29..5bc29197d90e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -165,6 +165,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 struct sg_io_hdr;
 
 int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
+int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
 int nvme_sg_get_version_num(int __user *ip);
 
 #endif /* _LINUX_NVME_H */

From 0a8d44cb33377969337fa6c1961b631605d5f453 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 15 Oct 2013 15:01:10 -0400
Subject: [PATCH 006/171] NVMe: Fix lockdep warnings

During the initialisation path, the queue lock is taken without interrupt
protection.  It's perfectly safe to do so, because the interrupt handler
can't run at this point, but it confuses lockdep.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e44350de8a21..0e9c5dcb2bd7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1172,9 +1172,9 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	if (result < 0)
 		goto release_sq;
 
-	spin_lock(&nvmeq->q_lock);
+	spin_lock_irq(&nvmeq->q_lock);
 	nvme_init_queue(nvmeq, qid);
-	spin_unlock(&nvmeq->q_lock);
+	spin_unlock_irq(&nvmeq->q_lock);
 
 	return result;
 
@@ -1290,9 +1290,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	if (result)
 		return result;
 
-	spin_lock(&nvmeq->q_lock);
+	spin_lock_irq(&nvmeq->q_lock);
 	nvme_init_queue(nvmeq, 0);
-	spin_unlock(&nvmeq->q_lock);
+	spin_unlock_irq(&nvmeq->q_lock);
 	return result;
 }
 
@@ -1836,9 +1836,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
 		struct nvme_queue *nvmeq = dev->queues[i];
 
-		spin_lock(&nvmeq->q_lock);
+		spin_lock_irq(&nvmeq->q_lock);
 		nvme_cancel_ios(nvmeq, false);
-		spin_unlock(&nvmeq->q_lock);
+		spin_unlock_irq(&nvmeq->q_lock);
 
 		nvme_free_queue(nvmeq);
 		dev->queue_count--;

From 68608c268bf265b039daeaafee6c902dfef32024 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 21 Jun 2013 14:36:34 -0400
Subject: [PATCH 007/171] NVMe: Cache dev->pci_dev in a local pointer

Helps with line-length issues

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 0e9c5dcb2bd7..300766973d76 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1891,6 +1891,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
  */
 static int nvme_dev_add(struct nvme_dev *dev)
 {
+	struct pci_dev *pdev = dev->pci_dev;
 	int res;
 	unsigned nn, i;
 	struct nvme_ns *ns;
@@ -1900,8 +1901,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
-								GFP_KERNEL);
+	mem = dma_alloc_coherent(&pdev->dev, 8192, &dma_addr, GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
@@ -1919,8 +1919,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
 	if (ctrl->mdts)
 		dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
-	if ((dev->pci_dev->vendor == PCI_VENDOR_ID_INTEL) &&
-			(dev->pci_dev->device == 0x0953) && ctrl->vs[3])
+	if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
+			(pdev->device == 0x0953) && ctrl->vs[3])
 		dev->stripe_size = 1 << (ctrl->vs[3] + shift);
 
 	id_ns = mem;

From 9a6b94584de1a0467d85b435df9c744c5c45a270 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 10 Dec 2013 13:10:36 -0700
Subject: [PATCH 008/171] NVMe: Device resume error handling

Adds controller error handling on resume power management. If the device
fails to initialize, the device is queued for a reset. If the reset fails,
a thread is spawned to remove the pci device.

If the device resumes as "busy", the device is responding to admin
commands but will not create IO queues. In this case, we need to remove
the gendisks and free the IO queues since they can't be used and may be
holding bios in their lists.

From testing, the dma pools require a pci device so this had to change
the pci driver 'remove' to release the dma resources in line with that
call instead of after all references to the device are released.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 108 ++++++++++++++++++++++++++++++++------
 include/linux/nvme.h      |   1 +
 2 files changed, 94 insertions(+), 15 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 300766973d76..000bca43c23b 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -58,6 +58,7 @@ module_param(use_threaded_interrupts, int, 0);
 static DEFINE_SPINLOCK(dev_list_lock);
 static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
+static struct workqueue_struct *nvme_workq;
 
 /*
  * An NVM Express queue.  Each device has at least two (one for admin
@@ -1968,7 +1969,6 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
 		goto disable;
 
-	pci_set_drvdata(pdev, dev);
 	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
 	if (!dev->bar)
 		goto disable;
@@ -1995,9 +1995,9 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 	if (dev->bar) {
 		iounmap(dev->bar);
 		dev->bar = NULL;
+		pci_release_regions(dev->pci_dev);
 	}
 
-	pci_release_regions(dev->pci_dev);
 	if (pci_is_enabled(dev->pci_dev))
 		pci_disable_device(dev->pci_dev);
 }
@@ -2085,11 +2085,6 @@ static void nvme_release_instance(struct nvme_dev *dev)
 static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
-	nvme_dev_remove(dev);
-	nvme_dev_shutdown(dev);
-	nvme_free_queues(dev);
-	nvme_release_instance(dev);
-	nvme_release_prp_pools(dev);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2161,6 +2156,70 @@ static int nvme_dev_start(struct nvme_dev *dev)
 	return result;
 }
 
+static int nvme_remove_dead_ctrl(void *arg)
+{
+	struct nvme_dev *dev = (struct nvme_dev *)arg;
+	struct pci_dev *pdev = dev->pci_dev;
+
+	if (pci_get_drvdata(pdev))
+		pci_stop_and_remove_bus_device(pdev);
+	kref_put(&dev->kref, nvme_free_dev);
+	return 0;
+}
+
+static void nvme_remove_disks(struct work_struct *ws)
+{
+	int i;
+	struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
+
+	nvme_dev_remove(dev);
+	spin_lock(&dev_list_lock);
+	for (i = dev->queue_count - 1; i > 0; i--) {
+		BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended);
+		nvme_free_queue(dev->queues[i]);
+		dev->queue_count--;
+		dev->queues[i] = NULL;
+	}
+	spin_unlock(&dev_list_lock);
+}
+
+static int nvme_dev_resume(struct nvme_dev *dev)
+{
+	int ret;
+
+	ret = nvme_dev_start(dev);
+	if (ret && ret != -EBUSY)
+		return ret;
+	if (ret == -EBUSY) {
+		spin_lock(&dev_list_lock);
+		INIT_WORK(&dev->reset_work, nvme_remove_disks);
+		queue_work(nvme_workq, &dev->reset_work);
+		spin_unlock(&dev_list_lock);
+	}
+	return 0;
+}
+
+static void nvme_dev_reset(struct nvme_dev *dev)
+{
+	nvme_dev_shutdown(dev);
+	if (nvme_dev_resume(dev)) {
+		dev_err(&dev->pci_dev->dev, "Device failed to resume\n");
+		kref_get(&dev->kref);
+		if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+							dev->instance))) {
+			dev_err(&dev->pci_dev->dev,
+				"Failed to start controller remove task\n");
+			kref_put(&dev->kref, nvme_free_dev);
+		}
+	}
+}
+
+static void nvme_reset_failed_dev(struct work_struct *ws)
+{
+	struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
+	nvme_dev_reset(dev);
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int result = -ENOMEM;
@@ -2180,7 +2239,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->pci_dev = pdev;
-
+	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
 	if (result)
 		goto free;
@@ -2232,7 +2291,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+	spin_lock(&dev_list_lock);
+	list_del_init(&dev->node);
+	spin_unlock(&dev_list_lock);
+
+	pci_set_drvdata(pdev, NULL);
+	flush_work(&dev->reset_work);
 	misc_deregister(&dev->miscdev);
+	nvme_dev_remove(dev);
+	nvme_dev_shutdown(dev);
+	nvme_free_queues(dev);
+	nvme_release_instance(dev);
+	nvme_release_prp_pools(dev);
 	kref_put(&dev->kref, nvme_free_dev);
 }
 
@@ -2256,13 +2327,12 @@ static int nvme_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
-	int ret;
 
-	ret = nvme_dev_start(ndev);
-	/* XXX: should remove gendisks if resume fails */
-	if (ret)
-		nvme_free_queues(ndev);
-	return ret;
+	if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
+		INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev);
+		queue_work(nvme_workq, &ndev->reset_work);
+	}
+	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
@@ -2303,9 +2373,14 @@ static int __init nvme_init(void)
 	if (IS_ERR(nvme_thread))
 		return PTR_ERR(nvme_thread);
 
+	result = -ENOMEM;
+	nvme_workq = create_singlethread_workqueue("nvme");
+	if (!nvme_workq)
+		goto kill_kthread;
+
 	result = register_blkdev(nvme_major, "nvme");
 	if (result < 0)
-		goto kill_kthread;
+		goto kill_workq;
 	else if (result > 0)
 		nvme_major = result;
 
@@ -2316,6 +2391,8 @@ static int __init nvme_init(void)
 
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
+ kill_workq:
+	destroy_workqueue(nvme_workq);
  kill_kthread:
 	kthread_stop(nvme_thread);
 	return result;
@@ -2325,6 +2402,7 @@ static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
 	unregister_blkdev(nvme_major, "nvme");
+	destroy_workqueue(nvme_workq);
 	kthread_stop(nvme_thread);
 }
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5bc29197d90e..eed81cc56d7e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,6 +87,7 @@ struct nvme_dev {
 	struct list_head namespaces;
 	struct kref kref;
 	struct miscdevice miscdev;
+	struct work_struct reset_work;
 	char name[12];
 	char serial[20];
 	char model[40];

From b8989db9d82465bf38a48a4d3ef32e7d8afc4d08 Mon Sep 17 00:00:00 2001
From: Alan <gnomes@lxorguk.ukuu.org.uk>
Date: Mon, 20 Jan 2014 18:01:56 +0000
Subject: [PATCH 009/171] x86, doc, kconfig: Fix dud URL for Microcode data

The actual data lives in the Intel download center, and that ought to also
be a reliable way to continue to find it. Unfortunately the actual URL
needed for doing it directly is about a foot long so give instructions.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Link: http://lkml.kernel.org/r/20140120180056.7173.62222.stgit@alan.etchedpixels.co.uk
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0952ecd60eca..64199bc08d66 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1064,9 +1064,9 @@ config MICROCODE_INTEL
 	  This options enables microcode patch loading support for Intel
 	  processors.
 
-	  For latest news and information on obtaining all the required
-	  Intel ingredients for this driver, check:
-	  <http://www.urbanmyth.org/microcode/>.
+	  For the current Intel microcode data package go to
+	  <https://downloadcenter.intel.com> and search for
+	  'Linux Processor Microcode Data File'.
 
 config MICROCODE_AMD
 	bool "AMD microcode loading support"

From 3aba55605326be6d7e624090858aa921ab519cda Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 4 Dec 2013 21:05:46 +0900
Subject: [PATCH 010/171] irqchip: renesas-irqc: Use lazy disable

Set the ->irq_enable() and ->irq_disable() methods to NULL
to enable lazy disable of interrupts. This by itself provides
some level of optimization, but is mainly enabled as ground
work for future Suspend-to-RAM wake up support.

Signed-off-by: Magnus Damm <damm@opensource.se>
Cc: rob.herring@calxeda.com
Cc: grant.likely@secretlab.ca
Cc: horms@verge.net.au
Link: http://lkml.kernel.org/r/20131204120546.29642.15772.sendpatchset@w520
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-renesas-irqc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 2f404ba61c6c..8fdd7d68cd00 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -212,8 +212,6 @@ static int irqc_probe(struct platform_device *pdev)
 	irq_chip->name = name;
 	irq_chip->irq_mask = irqc_irq_disable;
 	irq_chip->irq_unmask = irqc_irq_enable;
-	irq_chip->irq_enable = irqc_irq_enable;
-	irq_chip->irq_disable = irqc_irq_disable;
 	irq_chip->irq_set_type = irqc_irq_set_type;
 	irq_chip->flags	= IRQCHIP_SKIP_SET_WAKE;
 

From 43881ec7a88a3d3b2fd6da58168173e135b41fba Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 4 Dec 2013 21:05:56 +0900
Subject: [PATCH 011/171] irqchip: renesas-irqc: Enable mask on suspend

Now when lazy interrupt disable has been enabled in the driver
then extend the code to set IRQCHIP_MASK_ON_SUSPEND which tells
the core that only IRQs marked as wakeups need to stay enabled
during Suspend-to-RAM.

Signed-off-by: Magnus Damm <damm@opensource.se>
Cc: rob.herring@calxeda.com
Cc: grant.likely@secretlab.ca
Cc: horms@verge.net.au
Link: http://lkml.kernel.org/r/20131204120556.29642.27021.sendpatchset@w520
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-renesas-irqc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 8fdd7d68cd00..082d95cb5528 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -213,7 +213,7 @@ static int irqc_probe(struct platform_device *pdev)
 	irq_chip->irq_mask = irqc_irq_disable;
 	irq_chip->irq_unmask = irqc_irq_enable;
 	irq_chip->irq_set_type = irqc_irq_set_type;
-	irq_chip->flags	= IRQCHIP_SKIP_SET_WAKE;
+	irq_chip->flags	= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND;
 
 	p->irq_domain = irq_domain_add_simple(pdev->dev.of_node,
 					      p->number_of_irqs,

From 397e7b515785cad6e10b29f3001fd80c3f519bb8 Mon Sep 17 00:00:00 2001
From: Daniel Tang <dt.tangr@gmail.com>
Date: Thu, 5 Dec 2013 17:12:17 +1100
Subject: [PATCH 012/171] irqchip: Add support for TI-NSPIRE irqchip

This patch adds support for the interrupt controllers found in some
TI-Nspire models.

FIQ support was taken out to simplify the driver code and may be added
in later. Since Linux on this platform doesn't really use FIQs, this
wasn't really that important in the first place.

[ tglx: Made zevio_handle_irq static and reordered __init functions ]

Signed-off-by: Daniel Tang <dt.tangr@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Link: http://lkml.kernel.org/r/1386223937-12189-1-git-send-email-dt.tangr@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 .../interrupt-controller/lsi,zevio-intc.txt   |  18 +++
 drivers/irqchip/Makefile                      |   1 +
 drivers/irqchip/irq-zevio.c                   | 127 ++++++++++++++++++
 3 files changed, 146 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt
 create mode 100644 drivers/irqchip/irq-zevio.c

diff --git a/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt
new file mode 100644
index 000000000000..aee38e7c13e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt
@@ -0,0 +1,18 @@
+TI-NSPIRE interrupt controller
+
+Required properties:
+- compatible: Compatible property value should be "lsi,zevio-intc".
+
+- reg: Physical base address of the controller and length of memory mapped
+	region.
+
+- interrupt-controller : Identifies the node as an interrupt controller
+
+Example:
+
+interrupt-controller {
+	compatible = "lsi,zevio-intc";
+	interrupt-controller;
+	reg = <0xDC000000 0x1000>;
+	#interrupt-cells = <1>;
+};
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c60b9010b152..292b10645fd8 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -20,5 +20,6 @@ obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
 obj-$(CONFIG_RENESAS_IRQC)		+= irq-renesas-irqc.o
 obj-$(CONFIG_VERSATILE_FPGA_IRQ)	+= irq-versatile-fpga.o
+obj-$(CONFIG_ARCH_NSPIRE)		+= irq-zevio.o
 obj-$(CONFIG_ARCH_VT8500)		+= irq-vt8500.o
 obj-$(CONFIG_TB10X_IRQC)		+= irq-tb10x.o
diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c
new file mode 100644
index 000000000000..8ed04c4a43ee
--- /dev/null
+++ b/drivers/irqchip/irq-zevio.c
@@ -0,0 +1,127 @@
+/*
+ *  linux/drivers/irqchip/irq-zevio.c
+ *
+ *  Copyright (C) 2013 Daniel Tang <tangrs@tangrs.id.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/mach/irq.h>
+#include <asm/exception.h>
+
+#include "irqchip.h"
+
+#define IO_STATUS	0x000
+#define IO_RAW_STATUS	0x004
+#define IO_ENABLE	0x008
+#define IO_DISABLE	0x00C
+#define IO_CURRENT	0x020
+#define IO_RESET	0x028
+#define IO_MAX_PRIOTY	0x02C
+
+#define IO_IRQ_BASE	0x000
+#define IO_FIQ_BASE	0x100
+
+#define IO_INVERT_SEL	0x200
+#define IO_STICKY_SEL	0x204
+#define IO_PRIORITY_SEL	0x300
+
+#define MAX_INTRS	32
+#define FIQ_START	MAX_INTRS
+
+static struct irq_domain *zevio_irq_domain;
+static void __iomem *zevio_irq_io;
+
+static void zevio_irq_ack(struct irq_data *irqd)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(irqd);
+	struct irq_chip_regs *regs =
+		&container_of(irqd->chip, struct irq_chip_type, chip)->regs;
+
+	readl(gc->reg_base + regs->ack);
+}
+
+static asmlinkage void __exception_irq_entry zevio_handle_irq(struct pt_regs *regs)
+{
+	int irqnr;
+
+	while (readl(zevio_irq_io + IO_STATUS)) {
+		irqnr = readl(zevio_irq_io + IO_CURRENT);
+		irqnr = irq_find_mapping(zevio_irq_domain, irqnr);
+		handle_IRQ(irqnr, regs);
+	};
+}
+
+static void __init zevio_init_irq_base(void __iomem *base)
+{
+	/* Disable all interrupts */
+	writel(~0, base + IO_DISABLE);
+
+	/* Accept interrupts of all priorities */
+	writel(0xF, base + IO_MAX_PRIOTY);
+
+	/* Reset existing interrupts */
+	readl(base + IO_RESET);
+}
+
+static int __init zevio_of_init(struct device_node *node,
+				struct device_node *parent)
+{
+	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+	struct irq_chip_generic *gc;
+	int ret;
+
+	if (WARN_ON(zevio_irq_io || zevio_irq_domain))
+		return -EBUSY;
+
+	zevio_irq_io = of_iomap(node, 0);
+	BUG_ON(!zevio_irq_io);
+
+	/* Do not invert interrupt status bits */
+	writel(~0, zevio_irq_io + IO_INVERT_SEL);
+
+	/* Disable sticky interrupts */
+	writel(0, zevio_irq_io + IO_STICKY_SEL);
+
+	/* We don't use IRQ priorities. Set each IRQ to highest priority. */
+	memset_io(zevio_irq_io + IO_PRIORITY_SEL, 0, MAX_INTRS * sizeof(u32));
+
+	/* Init IRQ and FIQ */
+	zevio_init_irq_base(zevio_irq_io + IO_IRQ_BASE);
+	zevio_init_irq_base(zevio_irq_io + IO_FIQ_BASE);
+
+	zevio_irq_domain = irq_domain_add_linear(node, MAX_INTRS,
+						 &irq_generic_chip_ops, NULL);
+	BUG_ON(!zevio_irq_domain);
+
+	ret = irq_alloc_domain_generic_chips(zevio_irq_domain, MAX_INTRS, 1,
+					     "zevio_intc", handle_level_irq,
+					     clr, 0, IRQ_GC_INIT_MASK_CACHE);
+	BUG_ON(ret);
+
+	gc = irq_get_domain_generic_chip(zevio_irq_domain, 0);
+	gc->reg_base				= zevio_irq_io;
+	gc->chip_types[0].chip.irq_ack		= zevio_irq_ack;
+	gc->chip_types[0].chip.irq_mask		= irq_gc_mask_disable_reg;
+	gc->chip_types[0].chip.irq_unmask	= irq_gc_unmask_enable_reg;
+	gc->chip_types[0].regs.mask		= IO_IRQ_BASE + IO_ENABLE;
+	gc->chip_types[0].regs.enable		= IO_IRQ_BASE + IO_ENABLE;
+	gc->chip_types[0].regs.disable		= IO_IRQ_BASE + IO_DISABLE;
+	gc->chip_types[0].regs.ack		= IO_IRQ_BASE + IO_RESET;
+
+	set_handle_irq(zevio_handle_irq);
+
+	pr_info("TI-NSPIRE classic IRQ controller\n");
+	return 0;
+}
+
+IRQCHIP_DECLARE(zevio_irq, "lsi,zevio-intc", zevio_of_init);

From 23e21ddf6c3d9cb37aa69da496f80e8481e2c8bd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 23 Jan 2014 15:20:01 +0100
Subject: [PATCH 013/171] regulator: ab3100: cast fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AB3100 regulator driver emits a warning when compiled on 64bit
systems like this:

drivers/regulator/ab3100.c:
In function ‘ab3100_regulator_of_probe’:
srivers/regulator/ab3100.c:649:4:
warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]

As the int is a different size than the 64bit pointer used to
pass regulator data. Switch to using an unsigned long as ID
passed for the regulator to get rid of the warning.

Reported-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/ab3100.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
index 77b46d0b37a6..e10febe9ec34 100644
--- a/drivers/regulator/ab3100.c
+++ b/drivers/regulator/ab3100.c
@@ -498,7 +498,7 @@ static int ab3100_regulator_register(struct platform_device *pdev,
 				     struct ab3100_platform_data *plfdata,
 				     struct regulator_init_data *init_data,
 				     struct device_node *np,
-				     int id)
+				     unsigned long id)
 {
 	struct regulator_desc *desc;
 	struct ab3100_regulator *reg;
@@ -646,7 +646,7 @@ ab3100_regulator_of_probe(struct platform_device *pdev, struct device_node *np)
 		err = ab3100_regulator_register(
 			pdev, NULL, ab3100_regulator_matches[i].init_data,
 			ab3100_regulator_matches[i].of_node,
-			(int) ab3100_regulator_matches[i].driver_data);
+			(unsigned long)ab3100_regulator_matches[i].driver_data);
 		if (err) {
 			ab3100_regulators_remove(pdev);
 			return err;

From fb53a1ab88d14848dc292842e35c3bda3a665997 Mon Sep 17 00:00:00 2001
From: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Date: Thu, 23 Jan 2014 16:13:32 -0600
Subject: [PATCH 014/171] x86/quirks: Add workaround for AMD F16h Erratum792

The workaround for this Erratum is included in AGESA. But BIOSes
spun only after Jan2014 will have the fix (atleast server
versions of the chip). The erratum affects both embedded and
server platforms and since we cannot say with certainity that
ALL BIOSes on systems out in the field will have the fix, we
should probably insulate ourselves in case BIOS does not do the
right thing or someone is using old BIOSes.

Refer to Revision Guide for AMD F16h models 00h-0fh, document 51810
Rev. 3.04, November2013 for details on the Erratum.

Tested the patch on Fam16h server platform and it works fine.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Cc: <hmh@hmh.eng.br>
Cc: <Kim.Naru@amd.com>
Cc: <Suravee.Suthikulpanit@amd.com>
Cc: <bp@suse.de>
Cc: <sherry.hurwitz@amd.com>
Link: http://lkml.kernel.org/r/1390515212-1824-1-git-send-email-Aravind.Gopalakrishnan@amd.com
[ Minor edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/quirks.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 04ee1e2e4c02..7c6acd4b8995 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
 			quirk_amd_nb_node);
 
 #endif
+
+#ifdef CONFIG_PCI
+/*
+ * Processor does not ensure DRAM scrub read/write sequence
+ * is atomic wrt accesses to CC6 save state area. Therefore
+ * if a concurrent scrub read/write access is to same address
+ * the entry may appear as if it is not written. This quirk
+ * applies to Fam16h models 00h-0Fh
+ *
+ * See "Revision Guide" for AMD F16h models 00h-0fh,
+ * document 51810 rev. 3.04, Nov 2013
+ */
+static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
+{
+	u32 val;
+
+	/*
+	 * Suggested workaround:
+	 * set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b
+	 */
+	pci_read_config_dword(dev, 0x58, &val);
+	if (val & 0x1F) {
+		val &= ~(0x1F);
+		pci_write_config_dword(dev, 0x58, val);
+	}
+
+	pci_read_config_dword(dev, 0x5C, &val);
+	if (val & BIT(0)) {
+		val &= ~BIT(0);
+		pci_write_config_dword(dev, 0x5c, val);
+	}
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
+			amd_disable_seq_and_redirect_scrub);
+
+#endif

From 2993ae3305ad10b41e0d0bc2662f7754ee8e30fa Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 21 Jan 2014 10:22:09 +0300
Subject: [PATCH 015/171] x86/AMD/NB: Fix amd_set_subcaches() parameter type

This is under CAP_SYS_ADMIN, but Smatch complains that mask comes
from the user and the test for "mask > 0xf" can underflow.

The fix is simple: amd_set_subcaches() should hand down not an 'int'
but an 'unsigned long' like it was originally indended to do.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Daniel J Blueman <daniel@numascale-asia.com>
Link: http://lkml.kernel.org/r/20140121072209.GA22095@elgon.mountain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/amd_nb.h | 2 +-
 arch/x86/kernel/amd_nb.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index a54ee1d054d9..aaac3b2fb746 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void);
 extern void amd_flush_garts(void);
 extern int amd_numa_init(void);
 extern int amd_get_subcaches(int);
-extern int amd_set_subcaches(int, int);
+extern int amd_set_subcaches(int, unsigned long);
 
 struct amd_l3_cache {
 	unsigned indices;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 59554dca96ec..dec8de4e1663 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu)
 	return (mask >> (4 * cuid)) & 0xf;
 }
 
-int amd_set_subcaches(int cpu, int mask)
+int amd_set_subcaches(int cpu, unsigned long mask)
 {
 	static unsigned int reset, ban;
 	struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));

From ec65993443736a5091b68e80ff1734548944a4b8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Jan 2014 14:33:16 -0800
Subject: [PATCH 016/171] mm, x86: Account for TLB flushes only when debugging

Bisection between 3.11 and 3.12 fingered commit 9824cf97 ("mm:
vmstats: tlb flush counters") to cause overhead problems.

The counters are undeniably useful but how often do we really
need to debug TLB flush related issues?  It does not justify
taking the penalty everywhere so make it a debugging option.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-XzxjntugxuwpxXhcrxqqh53b@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tlbflush.h    |  6 +++---
 arch/x86/kernel/cpu/mtrr/generic.c |  4 ++--
 arch/x86/mm/tlb.c                  | 14 +++++++-------
 include/linux/vm_event_item.h      |  4 +++-
 include/linux/vmstat.h             |  8 ++++++++
 mm/vmstat.c                        |  4 +++-
 6 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e6d90babc245..04905bfc508b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
 
 static inline void __flush_tlb_one(unsigned long addr)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 	__flush_tlb_single(addr);
 }
 
@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)
  */
 static inline void __flush_tlb_up(void)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 }
 
 static inline void flush_tlb_all(void)
 {
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb_all();
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index ce2d0a2c3e4f..0e25a1bc5ab5 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	}
 
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 
 	/* Save MTRR state */
@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 static void post_set(void) __releases(set_atomicity_lock)
 {
 	/* Flush TLBs (no need to flush caches - they are disabled) */
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index ae699b3bbac8..05446c1cccfe 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info)
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
 
-	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 		if (f->flush_end == TLB_FLUSH_ALL)
 			local_flush_tlb();
@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 	info.flush_start = start;
 	info.flush_end = end;
 
-	count_vm_event(NR_TLB_REMOTE_FLUSH);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	if (is_uv_system()) {
 		unsigned int cpu;
 
@@ -151,7 +151,7 @@ void flush_tlb_current_task(void)
 
 	preempt_disable();
 
-	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	local_flush_tlb();
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
@@ -215,7 +215,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 
 	/* tlb_flushall_shift is on balance point, details in commit log */
 	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
-		count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 		local_flush_tlb();
 	} else {
 		if (has_large_page(mm, start, end)) {
@@ -224,7 +224,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 		}
 		/* flush range by one by one 'invlpg' */
 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
-			count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
+			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 			__flush_tlb_single(addr);
 		}
 
@@ -262,7 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 
 static void do_flush_tlb_all(void *info)
 {
-	count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	__flush_tlb_all();
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(smp_processor_id());
@@ -270,7 +270,7 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	count_vm_event(NR_TLB_REMOTE_FLUSH);
+	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index c557c6d096de..3a712e2e7d76 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -71,12 +71,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
 		NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif
+#endif /* CONFIG_SMP */
 		NR_TLB_LOCAL_FLUSH_ALL,
 		NR_TLB_LOCAL_FLUSH_ONE,
+#endif /* CONFIG_DEBUG_TLBFLUSH */
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e4b948080d20..80ebba9c2e87 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -83,6 +83,14 @@ static inline void vm_events_fold_cpu(int cpu)
 #define count_vm_numa_events(x, y) do { (void)(y); } while (0)
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_DEBUG_TLBFLUSH
+#define count_vm_tlb_event(x)	   count_vm_event(x)
+#define count_vm_tlb_events(x, y)  count_vm_events(x, y)
+#else
+#define count_vm_tlb_event(x)     do {} while (0)
+#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
+#endif
+
 #define __count_zone_vm_events(item, zone, delta) \
 		__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
 		zone_idx(zone), delta)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 72496140ac08..def5dd2fbe61 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -851,12 +851,14 @@ const char * const vmstat_text[] = {
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
 	"nr_tlb_remote_flush_received",
-#endif
+#endif /* CONFIG_SMP */
 	"nr_tlb_local_flush_all",
 	"nr_tlb_local_flush_one",
+#endif /* CONFIG_DEBUG_TLBFLUSH */
 
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };

From 15aa368255f249df0b2af630c9487bb5471bd7da Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Jan 2014 14:33:18 -0800
Subject: [PATCH 017/171] x86/mm: Clean up inconsistencies when flushing TLB
 ranges

NR_TLB_LOCAL_FLUSH_ALL is not always accounted for correctly and
the comparison with total_vm is done before taking
tlb_flushall_shift into account.  Clean it up.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Reviewed-by: Alex Shi <alex.shi@linaro.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Link: http://lkml.kernel.org/n/tip-Iz5gcahrgskIldvukulzi0hh@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 05446c1cccfe..5176526ddd59 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -189,6 +189,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 {
 	unsigned long addr;
 	unsigned act_entries, tlb_entries = 0;
+	unsigned long nr_base_pages;
 
 	preempt_disable();
 	if (current->active_mm != mm)
@@ -210,18 +211,17 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 		tlb_entries = tlb_lli_4k[ENTRIES];
 	else
 		tlb_entries = tlb_lld_4k[ENTRIES];
+
 	/* Assume all of TLB entries was occupied by this task */
-	act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
+	act_entries = tlb_entries >> tlb_flushall_shift;
+	act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
+	nr_base_pages = (end - start) >> PAGE_SHIFT;
 
 	/* tlb_flushall_shift is on balance point, details in commit log */
-	if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
+	if (nr_base_pages > act_entries || has_large_page(mm, start, end)) {
 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 		local_flush_tlb();
 	} else {
-		if (has_large_page(mm, start, end)) {
-			local_flush_tlb();
-			goto flush_all;
-		}
 		/* flush range by one by one 'invlpg' */
 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);

From 71b54f8263860a37dd9f50f81880a9d681fd9c10 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Jan 2014 14:33:19 -0800
Subject: [PATCH 018/171] x86/mm: Eliminate redundant page table walk during
 TLB range flushing

When choosing between doing an address space or ranged flush,
the x86 implementation of flush_tlb_mm_range takes into account
whether there are any large pages in the range.  A per-page
flush typically requires fewer entries than would covered by a
single large page and the check is redundant.

There is one potential exception.  THP migration flushes single
THP entries and it conceivably would benefit from flushing a
single entry instead of the mm.  However, this flush is after a
THP allocation, copy and page table update potentially with any
other threads serialised behind it.  In comparison to that, the
flush is noise.  It makes more sense to optimise balancing to
require fewer flushes than to optimise the flush itself.

This patch deletes the redundant huge page check.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-sgei1drpOcburujPsfh6ovmo@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5176526ddd59..dd8dda167a24 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -158,32 +158,6 @@ void flush_tlb_current_task(void)
 	preempt_enable();
 }
 
-/*
- * It can find out the THP large page, or
- * HUGETLB page in tlb_flush when THP disabled
- */
-static inline unsigned long has_large_page(struct mm_struct *mm,
-				 unsigned long start, unsigned long end)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr = ALIGN(start, HPAGE_SIZE);
-	for (; addr < end; addr += HPAGE_SIZE) {
-		pgd = pgd_offset(mm, addr);
-		if (likely(!pgd_none(*pgd))) {
-			pud = pud_offset(pgd, addr);
-			if (likely(!pud_none(*pud))) {
-				pmd = pmd_offset(pud, addr);
-				if (likely(!pmd_none(*pmd)))
-					if (pmd_large(*pmd))
-						return addr;
-			}
-		}
-	}
-	return 0;
-}
-
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned long vmflag)
 {
@@ -218,7 +192,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	nr_base_pages = (end - start) >> PAGE_SHIFT;
 
 	/* tlb_flushall_shift is on balance point, details in commit log */
-	if (nr_base_pages > act_entries || has_large_page(mm, start, end)) {
+	if (nr_base_pages > act_entries) {
 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 		local_flush_tlb();
 	} else {

From f98b7a772ab51b52ca4d2a14362fc0e0c8a2e0f3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Jan 2014 14:33:21 -0800
Subject: [PATCH 019/171] x86: mm: change tlb_flushall_shift for IvyBridge

There was a large performance regression that was bisected to
commit 611ae8e3 ("x86/tlb: enable tlb flush range support for
x86").  This patch simply changes the default balance point
between a local and global flush for IvyBridge.

In the interest of allowing the tests to be reproduced, this
patch was tested using mmtests 0.15 with the following
configurations

	configs/config-global-dhp__tlbflush-performance
	configs/config-global-dhp__scheduler-performance
	configs/config-global-dhp__network-performance

Results are from two machines

Ivybridge   4 threads:  Intel(R) Core(TM) i3-3240 CPU @ 3.40GHz
Ivybridge   8 threads:  Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz

Page fault microbenchmark showed nothing interesting.

Ebizzy was configured to run multiple iterations and threads.
Thread counts ranged from 1 to NR_CPUS*2. For each thread count,
it ran 100 iterations and each iteration lasted 10 seconds.

Ivybridge 4 threads
                    3.13.0-rc7            3.13.0-rc7
                       vanilla           altshift-v3
Mean   1     6395.44 (  0.00%)     6789.09 (  6.16%)
Mean   2     7012.85 (  0.00%)     8052.16 ( 14.82%)
Mean   3     6403.04 (  0.00%)     6973.74 (  8.91%)
Mean   4     6135.32 (  0.00%)     6582.33 (  7.29%)
Mean   5     6095.69 (  0.00%)     6526.68 (  7.07%)
Mean   6     6114.33 (  0.00%)     6416.64 (  4.94%)
Mean   7     6085.10 (  0.00%)     6448.51 (  5.97%)
Mean   8     6120.62 (  0.00%)     6462.97 (  5.59%)

Ivybridge 8 threads
                     3.13.0-rc7            3.13.0-rc7
                        vanilla           altshift-v3
Mean   1      7336.65 (  0.00%)     7787.02 (  6.14%)
Mean   2      8218.41 (  0.00%)     9484.13 ( 15.40%)
Mean   3      7973.62 (  0.00%)     8922.01 ( 11.89%)
Mean   4      7798.33 (  0.00%)     8567.03 (  9.86%)
Mean   5      7158.72 (  0.00%)     8214.23 ( 14.74%)
Mean   6      6852.27 (  0.00%)     7952.45 ( 16.06%)
Mean   7      6774.65 (  0.00%)     7536.35 ( 11.24%)
Mean   8      6510.50 (  0.00%)     6894.05 (  5.89%)
Mean   12     6182.90 (  0.00%)     6661.29 (  7.74%)
Mean   16     6100.09 (  0.00%)     6608.69 (  8.34%)

Ebizzy hits the worst case scenario for TLB range flushing every
time and it shows for these Ivybridge CPUs at least that the
default choice is a poor on.  The patch addresses the problem.

Next was a tlbflush microbenchmark written by Alex Shi at
http://marc.info/?l=linux-kernel&m=133727348217113 .  It
measures access costs while the TLB is being flushed.  The
expectation is that if there are always full TLB flushes that
the benchmark would suffer and it benefits from range flushing

There are 320 iterations of the test per thread count.  The
number of entries is randomly selected with a min of 1 and max
of 512.  To ensure a reasonably even spread of entries, the full
range is broken up into 8 sections and a random number selected
within that section.

iteration 1, random number between 0-64
iteration 2, random number between 64-128 etc

This is still a very weak methodology.  When you do not know
what are typical ranges, random is a reasonable choice but it
can be easily argued that the opimisation was for smaller ranges
and an even spread is not representative of any workload that
matters.  To improve this, we'd need to know the probability
distribution of TLB flush range sizes for a set of workloads
that are considered "common", build a synthetic trace and feed
that into this benchmark.  Even that is not perfect because it
would not account for the time between flushes but there are
limits of what can be reasonably done and still be doing
something useful.  If a representative synthetic trace is
provided then this benchmark could be revisited and the shift values retuned.

Ivybridge 4 threads
                        3.13.0-rc7            3.13.0-rc7
                           vanilla           altshift-v3
Mean       1       10.50 (  0.00%)       10.50 (  0.03%)
Mean       2       17.59 (  0.00%)       17.18 (  2.34%)
Mean       3       22.98 (  0.00%)       21.74 (  5.41%)
Mean       5       47.13 (  0.00%)       46.23 (  1.92%)
Mean       8       43.30 (  0.00%)       42.56 (  1.72%)

Ivybridge 8 threads
                         3.13.0-rc7            3.13.0-rc7
                            vanilla           altshift-v3
Mean       1         9.45 (  0.00%)        9.36 (  0.93%)
Mean       2         9.37 (  0.00%)        9.70 ( -3.54%)
Mean       3         9.36 (  0.00%)        9.29 (  0.70%)
Mean       5        14.49 (  0.00%)       15.04 ( -3.75%)
Mean       8        41.08 (  0.00%)       38.73 (  5.71%)
Mean       13       32.04 (  0.00%)       31.24 (  2.49%)
Mean       16       40.05 (  0.00%)       39.04 (  2.51%)

For both CPUs, average access time is reduced which is good as
this is the benchmark that was used to tune the shift values in
the first place albeit it is now known *how* the benchmark was
used.

The scheduler benchmarks were somewhat inconclusive.  They
showed gains and losses and makes me reconsider how stable those
benchmarks really are or if something else might be interfering
with the test results recently.

Network benchmarks were inconclusive.  Almost all results were
flat except for netperf-udp tests on the 4 thread machine.
These results were unstable and showed large variations between
reboots.  It is unknown if this is a recent problems but I've
noticed before that netperf-udp results tend to vary.

Based on these results, changing the default for Ivybridge seems
like a logical choice.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Reviewed-by: Alex Shi <alex.shi@linaro.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-cqnadffh1tiqrshthRj3Esge@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea04b342c026..bbe1b8b1f1c4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -628,7 +628,7 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
 		tlb_flushall_shift = 5;
 		break;
 	case 0x63a: /* Ivybridge */
-		tlb_flushall_shift = 1;
+		tlb_flushall_shift = 2;
 		break;
 	default:
 		tlb_flushall_shift = 6;

From b9a3b4c976c1209957326537ad5c0bb633dfd764 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Jan 2014 14:33:22 -0800
Subject: [PATCH 020/171] mm, x86: Revisit tlb_flushall_shift tuning for page
 flushes except on IvyBridge

There was a large ebizzy performance regression that was
bisected to commit 611ae8e3 (x86/tlb: enable tlb flush range
support for x86).  The problem was related to the
tlb_flushall_shift tuning for IvyBridge which was altered.  The
problem is that it is not clear if the tuning values for each
CPU family is correct as the methodology used to tune the values
is unclear.

This patch uses a conservative tlb_flushall_shift value for all
CPU families except IvyBridge so the decision can be revisited
if any regression is found as a result of this change.
IvyBridge is an exception as testing with one methodology
determined that the value of 2 is acceptable.  Details are in
the changelog for the patch "x86: mm: Change tlb_flushall_shift
for IvyBridge".

One important aspect of this to watch out for is Xen.  The
original commit log mentioned large performance gains on Xen.
It's possible Xen is more sensitive to this value if it flushes
small ranges of pages more frequently than workloads on bare
metal typically do.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-dyzMww3fqugnhbhgo6Gxmtkw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c   |  5 +----
 arch/x86/kernel/cpu/intel.c | 10 +++-------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 59bfebc8c805..96abccaada33 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -768,10 +768,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 
 static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
 {
-	tlb_flushall_shift = 5;
-
-	if (c->x86 <= 0x11)
-		tlb_flushall_shift = 4;
+	tlb_flushall_shift = 6;
 }
 
 static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index bbe1b8b1f1c4..d358a3928b8f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -615,21 +615,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
 	case 0x61d: /* six-core 45 nm xeon "Dunnington" */
 		tlb_flushall_shift = -1;
 		break;
+	case 0x63a: /* Ivybridge */
+		tlb_flushall_shift = 2;
+		break;
 	case 0x61a: /* 45 nm nehalem, "Bloomfield" */
 	case 0x61e: /* 45 nm nehalem, "Lynnfield" */
 	case 0x625: /* 32 nm nehalem, "Clarkdale" */
 	case 0x62c: /* 32 nm nehalem, "Gulftown" */
 	case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
 	case 0x62f: /* 32 nm Xeon E7 */
-		tlb_flushall_shift = 6;
-		break;
 	case 0x62a: /* SandyBridge */
 	case 0x62d: /* SandyBridge, "Romely-EP" */
-		tlb_flushall_shift = 5;
-		break;
-	case 0x63a: /* Ivybridge */
-		tlb_flushall_shift = 2;
-		break;
 	default:
 		tlb_flushall_shift = 6;
 	}

From a85eba8814631d0d48361c8b9a7ee0984e80c03c Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Tue, 21 Jan 2014 14:33:15 -0800
Subject: [PATCH 021/171] arch/x86/mm/srat: Skip NUMA_NO_NODE while parsing
 SLIT

When ACPI SLIT table has an I/O locality (i.e. a locality
unique to an I/O device), numa_set_distance() emits this warning
message:

 NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10

acpi_numa_slit_init() calls numa_set_distance() with
pxm_to_node(), which assumes that all localities have been
parsed with SRAT previously.  SRAT does not list I/O localities,
where as SLIT lists all localities including I/Os.  Hence,
pxm_to_node() returns NUMA_NO_NODE (-1) for an I/O locality.

I/O localities are not supported and are ignored today, but emitting
such warning message leads to unnecessary confusion.

Change acpi_numa_slit_init() to avoid calling
numa_set_distance() with NUMA_NO_NODE.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/n/tip-dSvpjjvp8aMzs1ybkftxohlh@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/srat.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 266ca912f62e..5ecf65117e6f 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -42,15 +42,25 @@ static __init inline int srat_disabled(void)
 	return acpi_numa < 0;
 }
 
-/* Callback for SLIT parsing */
+/*
+ * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
+ * I/O localities since SRAT does not list them.  I/O localities are
+ * not supported at this point.
+ */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
 	int i, j;
 
-	for (i = 0; i < slit->locality_count; i++)
-		for (j = 0; j < slit->locality_count; j++)
+	for (i = 0; i < slit->locality_count; i++) {
+		if (pxm_to_node(i) == NUMA_NO_NODE)
+			continue;
+		for (j = 0; j < slit->locality_count; j++) {
+			if (pxm_to_node(j) == NUMA_NO_NODE)
+				continue;
 			numa_set_distance(pxm_to_node(i), pxm_to_node(j),
 				slit->entry[slit->locality_count * i + j]);
+		}
+	}
 }
 
 /* Callback for Proximity Domain -> x2APIC mapping */

From edc6bc784028f2ef80dff0ff697363ff5a06e3a3 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Tue, 21 Jan 2014 10:41:39 -0800
Subject: [PATCH 022/171] x86/intel/mid: Fix X86_INTEL_MID dependencies

This patch fixes the following warning:

  warning: (X86_INTEL_MID) selects INTEL_SCU_IPC which has unmet direct dependencies (X86 && X86_PLATFORM_DEVICES && X86_INTEL_MID)

It happens because when selected, X86_INTEL_MID tries to select
INTEL_SCU_IPC regardless all its dependencies are met or not.

This patch fixes it by adding the missing X86_PLATFORM_DEVICES
dependency to X86_INTEL_MID.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Link: http://lkml.kernel.org/r/1390329699-20782-1-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb9af474dfca..3b6922ebf170 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -443,6 +443,7 @@ config X86_INTEL_MID
 	bool "Intel MID platform support"
 	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
+	depends on X86_PLATFORM_DEVICES
 	depends on PCI
 	depends on PCI_GOANY
 	depends on X86_IO_APIC

From 0d4dd797564cddc1f71ab0b239e9ea50ddd40b2a Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sat, 25 Jan 2014 23:50:23 +0200
Subject: [PATCH 023/171] perf/doc: Remove mention of non-existent
 set_perf_event_pending() from design.txt

set_perf_event_pending() was removed in e360adbe ("irq_work: Add
generic hardirq context callbacks").

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Link: http://lkml.kernel.org/r/4c54761865d40210be0628cb84701afc5d57b5d8.1390686193.git.baruch@tkos.co.il
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/design.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 67e5d0cace85..63a0e6f04a01 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -454,7 +454,6 @@ So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
 will need at least this:
 	- asm/perf_event.h - a basic stub will suffice at first
 	- support for atomic64 types (and associated helper functions)
-	- set_perf_event_pending() implemented
 
 If your architecture does have hardware capabilities, you can override the
 weak stub hw_perf_event_init() to register hardware counters.

From 950b8354716eb1f9c0b39777d379efa5f4125c04 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@cloudius-systems.com>
Date: Wed, 22 Jan 2014 21:58:46 +0200
Subject: [PATCH 024/171] perf tools: Demangle kernel and kernel module symbols
 too

Some kernels contain C++ code, and thus their symbols need to be
demangled.  This allows 'perf kvm top' to generate readable output.

Signed-off-by: Avi Kivity <avi@cloudius-systems.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/26f71bf5bf7ee1408e3f1a803556d5df18223ef1.1390420726.git.avi@cloudius-systems.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 759456728703..8f12f0f5101d 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -922,6 +922,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				  (u64)shdr.sh_offset);
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
+new_symbol:
 		/*
 		 * We need to figure out if the object was created from C++ sources
 		 * DWARF DW_compile_unit has this, but we don't always have access
@@ -933,7 +934,6 @@ int dso__load_sym(struct dso *dso, struct map *map,
 			if (demangled != NULL)
 				elf_name = demangled;
 		}
-new_symbol:
 		f = symbol__new(sym.st_value, sym.st_size,
 				GELF_ST_BIND(sym.st_info), elf_name);
 		free(demangled);

From d4b4ff8e28b474fac0fbfa9cfc40f88b9e41e380 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 10 Dec 2013 13:10:37 -0700
Subject: [PATCH 025/171] NVMe: Schedule reset for failed controllers

Schedules a controller reset when it indicates it has a failed status. If
the device does not become ready after a reset, the pci device will be
scheduled for removal.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[fixed checkpatch issue]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 21 +++++++++++++++++++--
 include/linux/nvme.h      |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 000bca43c23b..2f5b9f5f5a21 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -60,6 +60,8 @@ static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 
+static void nvme_reset_failed_dev(struct work_struct *ws);
+
 /*
  * An NVM Express queue.  Each device has at least two (one for admin
  * commands and one for I/O commands).
@@ -1612,13 +1614,25 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
 
 static int nvme_kthread(void *data)
 {
-	struct nvme_dev *dev;
+	struct nvme_dev *dev, *next;
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_lock(&dev_list_lock);
-		list_for_each_entry(dev, &dev_list, node) {
+		list_for_each_entry_safe(dev, next, &dev_list, node) {
 			int i;
+			if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
+							dev->initialized) {
+				if (work_busy(&dev->reset_work))
+					continue;
+				list_del_init(&dev->node);
+				dev_warn(&dev->pci_dev->dev,
+					"Failed status, reset controller\n");
+				INIT_WORK(&dev->reset_work,
+							nvme_reset_failed_dev);
+				queue_work(nvme_workq, &dev->reset_work);
+				continue;
+			}
 			for (i = 0; i < dev->queue_count; i++) {
 				struct nvme_queue *nvmeq = dev->queues[i];
 				if (!nvmeq)
@@ -2006,6 +2020,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 {
 	int i;
 
+	dev->initialized = 0;
 	for (i = dev->queue_count - 1; i >= 0; i--)
 		nvme_disable_queue(dev, i);
 
@@ -2196,6 +2211,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		queue_work(nvme_workq, &dev->reset_work);
 		spin_unlock(&dev_list_lock);
 	}
+	dev->initialized = 1;
 	return 0;
 }
 
@@ -2269,6 +2285,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto remove;
 
+	dev->initialized = 1;
 	kref_init(&dev->kref);
 	return 0;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index eed81cc56d7e..117d877e8be5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -95,6 +95,7 @@ struct nvme_dev {
 	u32 max_hw_sectors;
 	u32 stripe_size;
 	u16 oncs;
+	u8 initialized;
 };
 
 /*

From c30341dc3c436cf43508cd44cdfbb3810c38c195 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 10 Dec 2013 13:10:38 -0700
Subject: [PATCH 026/171] NVMe: Abort timed out commands

Send nvme abort command to io requests that have timed out on an
initialized device. If the command is not returned after another timeout,
schedule the controller for reset.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[fix endianness issues]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 68 ++++++++++++++++++++++++++++++++++++++-
 include/linux/nvme.h      |  1 +
 include/uapi/linux/nvme.h | 11 +++++++
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 2f5b9f5f5a21..75049532df96 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -83,6 +83,7 @@ struct nvme_queue {
 	u16 sq_head;
 	u16 sq_tail;
 	u16 cq_head;
+	u16 qid;
 	u8 cq_phase;
 	u8 cqe_seen;
 	u8 q_suspended;
@@ -100,6 +101,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
 	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
@@ -114,6 +116,7 @@ struct nvme_cmd_info {
 	nvme_completion_fn fn;
 	void *ctx;
 	unsigned long timeout;
+	int aborted;
 };
 
 static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
@@ -157,6 +160,7 @@ static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
 	info[cmdid].fn = handler;
 	info[cmdid].ctx = ctx;
 	info[cmdid].timeout = jiffies + timeout;
+	info[cmdid].aborted = 0;
 	return cmdid;
 }
 
@@ -175,6 +179,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
 #define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
+#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_dev *dev, void *ctx,
 						struct nvme_completion *cqe)
@@ -183,6 +188,10 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
 		return;
 	if (ctx == CMD_CTX_FLUSH)
 		return;
+	if (ctx == CMD_CTX_ABORT) {
+		++dev->abort_limit;
+		return;
+	}
 	if (ctx == CMD_CTX_COMPLETED) {
 		dev_warn(&dev->pci_dev->dev,
 				"completed id %d twice on queue %d\n",
@@ -1004,6 +1013,56 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 	return nvme_submit_admin_cmd(dev, &c, result);
 }
 
+/**
+ * nvme_abort_cmd - Attempt aborting a command
+ * @cmdid: Command id of a timed out IO
+ * @queue: The queue with timed out IO
+ *
+ * Schedule controller reset if the command was already aborted once before and
+ * still hasn't been returned to the driver, or if this is the admin queue.
+ */
+static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
+{
+	int a_cmdid;
+	struct nvme_command cmd;
+	struct nvme_dev *dev = nvmeq->dev;
+	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
+
+	if (!nvmeq->qid || info[cmdid].aborted) {
+		if (work_busy(&dev->reset_work))
+			return;
+		list_del_init(&dev->node);
+		dev_warn(&dev->pci_dev->dev,
+			"I/O %d QID %d timeout, reset controller\n", cmdid,
+								nvmeq->qid);
+		INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
+		queue_work(nvme_workq, &dev->reset_work);
+		return;
+	}
+
+	if (!dev->abort_limit)
+		return;
+
+	a_cmdid = alloc_cmdid(dev->queues[0], CMD_CTX_ABORT, special_completion,
+								ADMIN_TIMEOUT);
+	if (a_cmdid < 0)
+		return;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.abort.opcode = nvme_admin_abort_cmd;
+	cmd.abort.cid = cmdid;
+	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
+	cmd.abort.command_id = a_cmdid;
+
+	--dev->abort_limit;
+	info[cmdid].aborted = 1;
+	info[cmdid].timeout = jiffies + ADMIN_TIMEOUT;
+
+	dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid,
+							nvmeq->qid);
+	nvme_submit_cmd(dev->queues[0], &cmd);
+}
+
 /**
  * nvme_cancel_ios - Cancel outstanding I/Os
  * @queue: The queue to cancel I/Os on
@@ -1027,7 +1086,12 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 			continue;
 		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
 			continue;
-		dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
+		if (timeout && nvmeq->dev->initialized) {
+			nvme_abort_cmd(cmdid, nvmeq);
+			continue;
+		}
+		dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid,
+								nvmeq->qid);
 		ctx = cancel_cmdid(nvmeq, cmdid, &fn);
 		fn(nvmeq->dev, ctx, &cqe);
 	}
@@ -1119,6 +1183,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
+	nvmeq->qid = qid;
 	nvmeq->q_suspended = 1;
 	dev->queue_count++;
 
@@ -1929,6 +1994,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	ctrl = mem;
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
+	dev->abort_limit = ctrl->acl + 1;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 117d877e8be5..69ae03f6eb15 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -95,6 +95,7 @@ struct nvme_dev {
 	u32 max_hw_sectors;
 	u32 stripe_size;
 	u16 oncs;
+	u16 abort_limit;
 	u8 initialized;
 };
 
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 989c04e0c563..e5ab62201119 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -350,6 +350,16 @@ struct nvme_delete_queue {
 	__u32			rsvd11[5];
 };
 
+struct nvme_abort_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			sqid;
+	__u16			cid;
+	__u32			rsvd11[5];
+};
+
 struct nvme_download_firmware {
 	__u8			opcode;
 	__u8			flags;
@@ -384,6 +394,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_abort_cmd abort;
 	};
 };
 

From 0e53d18051725da46cbccfb7874a6422d4d4f274 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 10 Dec 2013 13:10:39 -0700
Subject: [PATCH 027/171] NVMe: Surprise removal handling

This adds checks to see if the nvme pci device was removed. The check
reads the status register for the value of -1, which it should never be
unless the device is no longer present.

If a user performs a surprise removal on an nvme device, the driver will
be notified either by the pci driver remove callback if the platform's
slot is capable of this event, or via reading the device BAR status
register, which will indicate controller failure and trigger a reset.

Either way, the device is not present so all outstanding commands would
timeout. This will not send queue deletion commands to a drive that
isn't present and fail after ioremap, significantly speeding up surprise
removal; previously this took over 2 minutes per IO queue pair created,
but this will complete removing the device within a few seconds.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 75049532df96..a51126129784 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1140,8 +1140,9 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 	irq_set_affinity_hint(vector, NULL);
 	free_irq(vector, nvmeq);
 
-	/* Don't tell the adapter to delete the admin queue */
-	if (qid) {
+	/* Don't tell the adapter to delete the admin queue.
+	 * Don't tell a removed adapter to delete IO queues. */
+	if (qid && readl(&dev->bar->csts) != -1) {
 		adapter_delete_sq(dev, qid);
 		adapter_delete_cq(dev, qid);
 	}
@@ -2052,12 +2053,18 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
 	if (!dev->bar)
 		goto disable;
-
+	if (readl(&dev->bar->csts) == -1) {
+		result = -ENODEV;
+		goto unmap;
+	}
 	dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap));
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
 
 	return 0;
 
+ unmap:
+	iounmap(dev->bar);
+	dev->bar = NULL;
  disable:
 	pci_release_regions(pdev);
  disable_pci:

From 4d115420707afcabe77d2535e092356df6664b70 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 10 Dec 2013 13:10:40 -0700
Subject: [PATCH 028/171] NVMe: Async IO queue deletion

This attempts to delete all IO queues at the same time asynchronously on
shutdown. This is necessary for a present device that is not responding;
a shutdown operation previously would take 2 minutes per queue-pair
to timeout before moving on to the next queue, making a device removal
appear to take a very long time or "hung" as reported by users.

In the previous worst case, a removal may be stuck forever until a kill
signal is given if there are more than 32 queue pairs since it would run
out of admin command IDs after over an hour of timed out sync commands
(admin queue depth is 64).

This patch will wait for the admin command timeout for all commands to
complete, so the worst case now for an unresponsive controller is 60
seconds, though that still seems like a long time.

Since this adds another way to take queues offline, some duplicate code
resulted so I moved these into more convienient functions.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[make functions static, correct line length and whitespace issues]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 229 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 217 insertions(+), 12 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a51126129784..1c8a82fbbc37 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -62,6 +62,14 @@ static struct workqueue_struct *nvme_workq;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
 
+struct async_cmd_info {
+	struct kthread_work work;
+	struct kthread_worker *worker;
+	u32 result;
+	int status;
+	void *ctx;
+};
+
 /*
  * An NVM Express queue.  Each device has at least two (one for admin
  * commands and one for I/O commands).
@@ -87,6 +95,7 @@ struct nvme_queue {
 	u8 cq_phase;
 	u8 cqe_seen;
 	u8 q_suspended;
+	struct async_cmd_info cmdinfo;
 	unsigned long cmdid_data[];
 };
 
@@ -208,6 +217,15 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
 	dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx);
 }
 
+static void async_completion(struct nvme_dev *dev, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct async_cmd_info *cmdinfo = ctx;
+	cmdinfo->result = le32_to_cpup(&cqe->result);
+	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
+}
+
 /*
  * Called with local interrupts disabled and the q_lock held.  May not sleep.
  */
@@ -898,12 +916,34 @@ int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
 	return cmdinfo.status;
 }
 
+static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
+			struct nvme_command *cmd,
+			struct async_cmd_info *cmdinfo, unsigned timeout)
+{
+	int cmdid;
+
+	cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout);
+	if (cmdid < 0)
+		return cmdid;
+	cmdinfo->status = -EINTR;
+	cmd->common.command_id = cmdid;
+	nvme_submit_cmd(nvmeq, cmd);
+	return 0;
+}
+
 int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
 								u32 *result)
 {
 	return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT);
 }
 
+static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
+		struct nvme_command *cmd, struct async_cmd_info *cmdinfo)
+{
+	return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo,
+								ADMIN_TIMEOUT);
+}
+
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
 	int status;
@@ -1124,15 +1164,20 @@ static void nvme_free_queues(struct nvme_dev *dev)
 	}
 }
 
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+/**
+ * nvme_suspend_queue - put queue into suspended state
+ * @nvmeq - queue to suspend
+ *
+ * Returns 1 if already suspended, 0 otherwise.
+ */
+static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
-	struct nvme_queue *nvmeq = dev->queues[qid];
-	int vector = dev->entry[nvmeq->cq_vector].vector;
+	int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (nvmeq->q_suspended) {
 		spin_unlock_irq(&nvmeq->q_lock);
-		return;
+		return 1;
 	}
 	nvmeq->q_suspended = 1;
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -1140,17 +1185,33 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 	irq_set_affinity_hint(vector, NULL);
 	free_irq(vector, nvmeq);
 
+	return 0;
+}
+
+static void nvme_clear_queue(struct nvme_queue *nvmeq)
+{
+	spin_lock_irq(&nvmeq->q_lock);
+	nvme_process_cq(nvmeq);
+	nvme_cancel_ios(nvmeq, false);
+	spin_unlock_irq(&nvmeq->q_lock);
+}
+
+static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+{
+	struct nvme_queue *nvmeq = dev->queues[qid];
+
+	if (!nvmeq)
+		return;
+	if (nvme_suspend_queue(nvmeq))
+		return;
+
 	/* Don't tell the adapter to delete the admin queue.
 	 * Don't tell a removed adapter to delete IO queues. */
 	if (qid && readl(&dev->bar->csts) != -1) {
 		adapter_delete_sq(dev, qid);
 		adapter_delete_cq(dev, qid);
 	}
-
-	spin_lock_irq(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
-	nvme_cancel_ios(nvmeq, false);
-	spin_unlock_irq(&nvmeq->q_lock);
+	nvme_clear_queue(nvmeq);
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -2089,20 +2150,164 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 		pci_disable_device(dev->pci_dev);
 }
 
+struct nvme_delq_ctx {
+	struct task_struct *waiter;
+	struct kthread_worker *worker;
+	atomic_t refcount;
+};
+
+static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
+{
+	dq->waiter = current;
+	mb();
+
+	for (;;) {
+		set_current_state(TASK_KILLABLE);
+		if (!atomic_read(&dq->refcount))
+			break;
+		if (!schedule_timeout(ADMIN_TIMEOUT) ||
+					fatal_signal_pending(current)) {
+			set_current_state(TASK_RUNNING);
+
+			nvme_disable_ctrl(dev, readq(&dev->bar->cap));
+			nvme_disable_queue(dev, 0);
+
+			send_sig(SIGKILL, dq->worker->task, 1);
+			flush_kthread_worker(dq->worker);
+			return;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+}
+
+static void nvme_put_dq(struct nvme_delq_ctx *dq)
+{
+	atomic_dec(&dq->refcount);
+	if (dq->waiter)
+		wake_up_process(dq->waiter);
+}
+
+static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
+{
+	atomic_inc(&dq->refcount);
+	return dq;
+}
+
+static void nvme_del_queue_end(struct nvme_queue *nvmeq)
+{
+	struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
+
+	nvme_clear_queue(nvmeq);
+	nvme_put_dq(dq);
+}
+
+static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
+						kthread_work_func_t fn)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.delete_queue.opcode = opcode;
+	c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
+
+	init_kthread_work(&nvmeq->cmdinfo.work, fn);
+	return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo);
+}
+
+static void nvme_del_cq_work_handler(struct kthread_work *work)
+{
+	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+							cmdinfo.work);
+	nvme_del_queue_end(nvmeq);
+}
+
+static int nvme_delete_cq(struct nvme_queue *nvmeq)
+{
+	return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
+						nvme_del_cq_work_handler);
+}
+
+static void nvme_del_sq_work_handler(struct kthread_work *work)
+{
+	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+							cmdinfo.work);
+	int status = nvmeq->cmdinfo.status;
+
+	if (!status)
+		status = nvme_delete_cq(nvmeq);
+	if (status)
+		nvme_del_queue_end(nvmeq);
+}
+
+static int nvme_delete_sq(struct nvme_queue *nvmeq)
+{
+	return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
+						nvme_del_sq_work_handler);
+}
+
+static void nvme_del_queue_start(struct kthread_work *work)
+{
+	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
+							cmdinfo.work);
+	allow_signal(SIGKILL);
+	if (nvme_delete_sq(nvmeq))
+		nvme_del_queue_end(nvmeq);
+}
+
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+	int i;
+	DEFINE_KTHREAD_WORKER_ONSTACK(worker);
+	struct nvme_delq_ctx dq;
+	struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
+					&worker, "nvme%d", dev->instance);
+
+	if (IS_ERR(kworker_task)) {
+		dev_err(&dev->pci_dev->dev,
+			"Failed to create queue del task\n");
+		for (i = dev->queue_count - 1; i > 0; i--)
+			nvme_disable_queue(dev, i);
+		return;
+	}
+
+	dq.waiter = NULL;
+	atomic_set(&dq.refcount, 0);
+	dq.worker = &worker;
+	for (i = dev->queue_count - 1; i > 0; i--) {
+		struct nvme_queue *nvmeq = dev->queues[i];
+
+		if (nvme_suspend_queue(nvmeq))
+			continue;
+		nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
+		nvmeq->cmdinfo.worker = dq.worker;
+		init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
+		queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
+	}
+	nvme_wait_dq(&dq, dev);
+	kthread_stop(kworker_task);
+}
+
 static void nvme_dev_shutdown(struct nvme_dev *dev)
 {
 	int i;
 
 	dev->initialized = 0;
-	for (i = dev->queue_count - 1; i >= 0; i--)
-		nvme_disable_queue(dev, i);
 
 	spin_lock(&dev_list_lock);
 	list_del_init(&dev->node);
 	spin_unlock(&dev_list_lock);
 
-	if (dev->bar)
+	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
+		for (i = dev->queue_count - 1; i >= 0; i--) {
+			struct nvme_queue *nvmeq = dev->queues[i];
+			nvme_suspend_queue(nvmeq);
+			nvme_clear_queue(nvmeq);
+		}
+	} else {
+		nvme_disable_io_queues(dev);
 		nvme_shutdown_ctrl(dev);
+		nvme_disable_queue(dev, 0);
+	}
 	nvme_dev_unmap(dev);
 }
 

From 469071a37afc8a627b6b2ddf29db0a097d864845 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Mon, 9 Dec 2013 12:58:46 -0500
Subject: [PATCH 029/171] NVMe: Dynamically allocate partition numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some users need more than 64 partitions per device.  Rather than simply
increasing the number of partitions, switch to the dynamic partition
allocation scheme.

This means that minor numbers are not stable across boots, but since major
numbers aren't either, I cannot see this being a significant problem.

Tested-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 36 +++---------------------------------
 1 file changed, 3 insertions(+), 33 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 1c8a82fbbc37..7b615a063e92 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -46,7 +46,6 @@
 #define NVME_Q_DEPTH 1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
-#define NVME_MINORS 64
 #define ADMIN_TIMEOUT	(60 * HZ)
 
 static int nvme_major;
@@ -1780,33 +1779,6 @@ static int nvme_kthread(void *data)
 	return 0;
 }
 
-static DEFINE_IDA(nvme_index_ida);
-
-static int nvme_get_ns_idx(void)
-{
-	int index, error;
-
-	do {
-		if (!ida_pre_get(&nvme_index_ida, GFP_KERNEL))
-			return -1;
-
-		spin_lock(&dev_list_lock);
-		error = ida_get_new(&nvme_index_ida, &index);
-		spin_unlock(&dev_list_lock);
-	} while (error == -EAGAIN);
-
-	if (error)
-		index = -1;
-	return index;
-}
-
-static void nvme_put_ns_idx(int index)
-{
-	spin_lock(&dev_list_lock);
-	ida_remove(&nvme_index_ida, index);
-	spin_unlock(&dev_list_lock);
-}
-
 static void nvme_config_discard(struct nvme_ns *ns)
 {
 	u32 logical_block_size = queue_logical_block_size(ns->queue);
@@ -1840,7 +1812,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	ns->dev = dev;
 	ns->queue->queuedata = ns;
 
-	disk = alloc_disk(NVME_MINORS);
+	disk = alloc_disk(0);
 	if (!disk)
 		goto out_free_queue;
 	ns->ns_id = nsid;
@@ -1853,12 +1825,12 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
 
 	disk->major = nvme_major;
-	disk->minors = NVME_MINORS;
-	disk->first_minor = NVME_MINORS * nvme_get_ns_idx();
+	disk->first_minor = 0;
 	disk->fops = &nvme_fops;
 	disk->private_data = ns;
 	disk->queue = ns->queue;
 	disk->driverfs_dev = &dev->pci_dev->dev;
+	disk->flags = GENHD_FL_EXT_DEVT;
 	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
 	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
@@ -1876,9 +1848,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 
 static void nvme_ns_free(struct nvme_ns *ns)
 {
-	int index = ns->disk->first_minor / NVME_MINORS;
 	put_disk(ns->disk);
-	nvme_put_ns_idx(index);
 	blk_cleanup_queue(ns->queue);
 	kfree(ns);
 }

From a1a5ef9998b1379780790b878457b0e0f48b25db Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 16 Dec 2013 13:50:00 -0500
Subject: [PATCH 030/171] NVMe: Disable admin queue on init failure

Disable the admin queue if device fails during initialization so the
queue's irq is freed.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[rewritten to use nvme_free_queues]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7b615a063e92..89021d710ec2 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1152,11 +1152,11 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
 	kfree(nvmeq);
 }
 
-static void nvme_free_queues(struct nvme_dev *dev)
+static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
 	int i;
 
-	for (i = dev->queue_count - 1; i >= 0; i--) {
+	for (i = dev->queue_count - 1; i >= lowest; i--) {
 		nvme_free_queue(dev->queues[i]);
 		dev->queue_count--;
 		dev->queues[i] = NULL;
@@ -1991,7 +1991,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return 0;
 
  free_queues:
-	nvme_free_queues(dev);
+	nvme_free_queues(dev, 1);
 	return result;
 }
 
@@ -2411,6 +2411,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
 	return result;
 
  disable:
+	nvme_disable_queue(dev, 0);
 	spin_lock(&dev_list_lock);
 	list_del_init(&dev->node);
 	spin_unlock(&dev_list_lock);
@@ -2542,7 +2543,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  shutdown:
 	nvme_dev_shutdown(dev);
  release_pools:
-	nvme_free_queues(dev);
+	nvme_free_queues(dev, 0);
 	nvme_release_prp_pools(dev);
  release:
 	nvme_release_instance(dev);
@@ -2566,7 +2567,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	misc_deregister(&dev->miscdev);
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
-	nvme_free_queues(dev);
+	nvme_free_queues(dev, 0);
 	nvme_release_instance(dev);
 	nvme_release_prp_pools(dev);
 	kref_put(&dev->kref, nvme_free_dev);

From 09ece1424f52a2d3ad0ab715e96b0fb342af4f03 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 27 Jan 2014 11:29:40 -0500
Subject: [PATCH 031/171] NVMe: Add a pci_driver shutdown method

We need to shut down the device cleanly when the system is being shut down.
This was in an earlier patch but was inadvertently lost during a rewrite.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 89021d710ec2..eaace767be49 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2554,6 +2554,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return result;
 }
 
+static void nvme_shutdown(struct pci_dev *pdev)
+{
+	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	nvme_dev_shutdown(dev);
+}
+
 static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -2625,6 +2631,7 @@ static struct pci_driver nvme_driver = {
 	.id_table	= nvme_id_table,
 	.probe		= nvme_probe,
 	.remove		= nvme_remove,
+	.shutdown	= nvme_shutdown,
 	.driver		= {
 		.pm	= &nvme_dev_pm_ops,
 	},

From 3193f07bb7ae723f33ac8e8b9db317a4f68d7d18 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 27 Jan 2014 15:57:22 -0500
Subject: [PATCH 032/171] NVMe: Include device and queue numbers in interrupt
 name

On larger systems with many drives, it may help debugging to know which
queue is tied to which interrupt, just by looking at /proc/interrupts.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index eaace767be49..5ffc26937226 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -76,6 +76,7 @@ struct async_cmd_info {
 struct nvme_queue {
 	struct device *q_dmadev;
 	struct nvme_dev *dev;
+	char irqname[24];	/* nvme4294967295-65535\0 */
 	spinlock_t q_lock;
 	struct nvme_command *sq_cmds;
 	volatile struct nvme_completion *cqes;
@@ -1235,6 +1236,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 
 	nvmeq->q_dmadev = dmadev;
 	nvmeq->dev = dev;
+	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
+			dev->instance, qid);
 	spin_lock_init(&nvmeq->q_lock);
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
@@ -1297,7 +1300,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	if (result < 0)
 		goto release_cq;
 
-	result = queue_request_irq(dev, nvmeq, "nvme");
+	result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
 	if (result < 0)
 		goto release_sq;
 
@@ -1415,7 +1418,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	if (result)
 		return result;
 
-	result = queue_request_irq(dev, nvmeq, "nvme admin");
+	result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
 	if (result)
 		return result;
 
@@ -1873,6 +1876,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
+	struct nvme_queue *adminq = dev->queues[0];
 	struct pci_dev *pdev = dev->pci_dev;
 	int result, cpu, i, vecs, nr_io_queues, size, q_depth;
 
@@ -1899,7 +1903,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	}
 
 	/* Deregister the admin queue's interrupt */
-	free_irq(dev->entry[0].vector, dev->queues[0]);
+	free_irq(dev->entry[0].vector, adminq);
 
 	vecs = nr_io_queues;
 	for (i = 0; i < vecs; i++)
@@ -1937,9 +1941,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	 */
 	nr_io_queues = vecs;
 
-	result = queue_request_irq(dev, dev->queues[0], "nvme admin");
+	result = queue_request_irq(dev, adminq, adminq->irqname);
 	if (result) {
-		dev->queues[0]->q_suspended = 1;
+		adminq->q_suspended = 1;
 		goto free_queues;
 	}
 

From 317b5684d52269b75b4ec6480f9dac056d0d4ba8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 27 Jan 2014 17:34:07 +0000
Subject: [PATCH 033/171] regulator: core: Correct default return value for
 full constraints

Once we have full constraints then all supply mappings should be known to
the regulator API. This means that we should treat failed lookups as fatal
rather than deferring in the hope of further registrations but this was
broken by commit 9b92da1f1205bd25 "regulator: core: Fix default return
value for _get()" which was targeted at DT systems but unintentionally
broke non-DT systems by changing the default return value.

Fix this by explicitly returning -EPROBE_DEFER from the DT lookup if we
find a property but no corresponding regulator and by having the non-DT
case default to -ENODEV when we have full constraints.

Fixes: 9b92da1f1205bd25 "regulator: core: Fix default return value for _get()"
Signed-off-by: Mark Brown <broonie@linaro.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org
---
 drivers/regulator/core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index b38a6b669e8c..16a309e5c024 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1272,6 +1272,8 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev,
 				if (r->dev.parent &&
 					node == r->dev.of_node)
 					return r;
+			*ret = -EPROBE_DEFER;
+			return NULL;
 		} else {
 			/*
 			 * If we couldn't even get the node then it's
@@ -1312,7 +1314,7 @@ static struct regulator *_regulator_get(struct device *dev, const char *id,
 	struct regulator_dev *rdev;
 	struct regulator *regulator = ERR_PTR(-EPROBE_DEFER);
 	const char *devname = NULL;
-	int ret = -EPROBE_DEFER;
+	int ret;
 
 	if (id == NULL) {
 		pr_err("get() with no identifier\n");
@@ -1322,6 +1324,11 @@ static struct regulator *_regulator_get(struct device *dev, const char *id,
 	if (dev)
 		devname = dev_name(dev);
 
+	if (have_full_constraints())
+		ret = -ENODEV;
+	else
+		ret = -EPROBE_DEFER;
+
 	mutex_lock(&regulator_list_mutex);
 
 	rdev = regulator_dev_lookup(dev, id, &ret);

From 7de8246eba58c6f4c315b8c69fb29e445ccf3c80 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 28 Jan 2014 09:38:37 -0800
Subject: [PATCH 034/171] [IA64] Wire up new sched_setattr and sched_getattr
 syscalls

New syscalls for v3.14

Signed-off-by: Tony Luck <tony.luck#intel.com>
---
 arch/ia64/include/asm/unistd.h      | 2 +-
 arch/ia64/include/uapi/asm/unistd.h | 2 ++
 arch/ia64/kernel/entry.S            | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index afd45e0d552e..ae763d8bf55a 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls			312 /* length of syscall table */
+#define NR_syscalls			314 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 34fd6fe46da1..715e85f858de 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -325,5 +325,7 @@
 #define __NR_process_vm_writev		1333
 #define __NR_accept4			1334
 #define __NR_finit_module		1335
+#define __NR_sched_setattr		1336
+#define __NR_sched_getattr		1337
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ddea607f948a..fa8d61a312a7 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1773,6 +1773,8 @@ sys_call_table:
 	data8 sys_process_vm_writev
 	data8 sys_accept4
 	data8 sys_finit_module			// 1335
+	data8 sys_sched_setattr
+	data8 sys_sched_getattr
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */

From bdfd70fde389412be60f7e8aaed5732dc26fc8ac Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 29 Jan 2014 09:33:52 -0500
Subject: [PATCH 035/171] NVMe: Correct uses of INIT_WORK

We need to initialise the work_struct when we initialise the rest of the
struct nvme_dev, otherwise we'll hit a lockdep warning when we remove
the device.  Use PREPARE_WORK to change the function pointer instead
of INIT_WORK.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 5ffc26937226..23728099e36f 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1075,7 +1075,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
 		dev_warn(&dev->pci_dev->dev,
 			"I/O %d QID %d timeout, reset controller\n", cmdid,
 								nvmeq->qid);
-		INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
+		PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev);
 		queue_work(nvme_workq, &dev->reset_work);
 		return;
 	}
@@ -1757,7 +1757,7 @@ static int nvme_kthread(void *data)
 				list_del_init(&dev->node);
 				dev_warn(&dev->pci_dev->dev,
 					"Failed status, reset controller\n");
-				INIT_WORK(&dev->reset_work,
+				PREPARE_WORK(&dev->reset_work,
 							nvme_reset_failed_dev);
 				queue_work(nvme_workq, &dev->reset_work);
 				continue;
@@ -2460,7 +2460,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		return ret;
 	if (ret == -EBUSY) {
 		spin_lock(&dev_list_lock);
-		INIT_WORK(&dev->reset_work, nvme_remove_disks);
+		PREPARE_WORK(&dev->reset_work, nvme_remove_disks);
 		queue_work(nvme_workq, &dev->reset_work);
 		spin_unlock(&dev_list_lock);
 	}
@@ -2507,6 +2507,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto free;
 
 	INIT_LIST_HEAD(&dev->namespaces);
+	INIT_WORK(&dev->reset_work, nvme_reset_failed_dev);
 	dev->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2605,7 +2606,7 @@ static int nvme_resume(struct device *dev)
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
 	if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
-		INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev);
+		PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev);
 		queue_work(nvme_workq, &ndev->reset_work);
 	}
 	return 0;

From f428ebd184c82a7914b2aa7e9f868918aaf7ea78 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jan 2014 16:40:02 +0100
Subject: [PATCH 036/171] perf tools: Fix AAAAARGH64 memory barriers

Someone got the load and store barriers mixed up for AAAAARGH64.  Turn
them the right side up.

Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Fixes: a94d342b9cb0 ("tools/perf: Add required memory barriers")
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20140124154002.GF31570@twins.programming.kicks-ass.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/perf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 7daa806d9050..e84fa26bc1be 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -100,8 +100,8 @@
 
 #ifdef __aarch64__
 #define mb()		asm volatile("dmb ish" ::: "memory")
-#define wmb()		asm volatile("dmb ishld" ::: "memory")
-#define rmb()		asm volatile("dmb ishst" ::: "memory")
+#define wmb()		asm volatile("dmb ishst" ::: "memory")
+#define rmb()		asm volatile("dmb ishld" ::: "memory")
 #define cpu_relax()	asm volatile("yield" ::: "memory")
 #endif
 

From 0519e9ad89e5cd6e6b08398f57c6a71d9580564c Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Thu, 16 Jan 2014 16:01:11 +0100
Subject: [PATCH 037/171] crypto: s390 - fix concurrency issue in aes-ctr mode

The aes-ctr mode uses one preallocated page without any concurrency
protection. When multiple threads run aes-ctr encryption or decryption
this can lead to data corruption.

The patch introduces locking for the page and a fallback solution with
slower en/decryption performance in concurrency situations.

Cc: stable@vger.kernel.org
Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c | 65 ++++++++++++++++++++++++++-----------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b3feabd39f31..cf3c0089bef2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/spinlock.h>
 #include "crypt_s390.h"
 
 #define AES_KEYLEN_128		1
@@ -32,6 +33,7 @@
 #define AES_KEYLEN_256		4
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 static char keylen_flag;
 
 struct s390_aes_ctx {
@@ -758,43 +760,67 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return aes_set_key(tfm, in_key, key_len);
 }
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+		       AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[AES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
 
 	if (!walk->nbytes)
 		return ret;
 
-	memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= AES_BLOCK_SIZE) {
-			/* only use complete blocks, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-						 nbytes & ~(AES_BLOCK_SIZE - 1);
-			for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = AES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, sctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(ctrblk, AES_BLOCK_SIZE);
+			crypto_inc(ctrptr, AES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
@@ -802,14 +828,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
-				       AES_BLOCK_SIZE, ctrblk);
+				       AES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != AES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, AES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+
 	return ret;
 }
 

From adc3fcf1552b6e406d172fd9690bbd1395053d13 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Wed, 22 Jan 2014 13:00:04 +0100
Subject: [PATCH 038/171] crypto: s390 - fix des and des3_ede cbc concurrency
 issue

In s390 des and des3_ede cbc mode the iv value is not protected
against concurrency access and modifications from another running
en/decrypt operation which is using the very same tfm struct
instance. This fix copies the iv to the local stack before
the crypto operation and stores the value back when done.

Cc: stable@vger.kernel.org
Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_s390.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 200f2a1b599d..82a0a2ad2494 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -105,29 +105,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 }
 
 static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *iv, struct blkcipher_walk *walk)
+			    struct blkcipher_walk *walk)
 {
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
+	struct {
+		u8 iv[DES_BLOCK_SIZE];
+		u8 key[DES3_KEY_SIZE];
+	} param;
 
 	if (!nbytes)
 		goto out;
 
-	memcpy(iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
 	do {
 		/* only use complete blocks */
 		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_kmc(func, iv, out, in, n);
+		ret = crypt_s390_kmc(func, &param, out, in, n);
 		if (ret < 0 || ret != n)
 			return -EIO;
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	} while ((nbytes = walk->nbytes));
-	memcpy(walk->iv, iv, DES_BLOCK_SIZE);
+	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 
 out:
 	return ret;
@@ -179,22 +185,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -327,22 +331,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {

From ee97dc7db4cbda33e4241c2d85b42d1835bc8a35 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Wed, 22 Jan 2014 13:01:33 +0100
Subject: [PATCH 039/171] crypto: s390 - fix des and des3_ede ctr concurrency
 issue

In s390 des and 3des ctr mode there is one preallocated page
used to speed up the en/decryption. This page is not protected
against concurrent usage and thus there is a potential of data
corruption with multiple threads.

The fix introduces locking/unlocking the ctr page and a slower
fallback solution at concurrency situations.

Cc: stable@vger.kernel.org
Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_s390.c | 69 ++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 21 deletions(-)

diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 82a0a2ad2494..0a5aac8a9412 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -25,6 +25,7 @@
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
@@ -368,54 +369,80 @@ static struct crypto_alg cbc_des3_alg = {
 	}
 };
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* align to block size, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+			    struct s390_des_ctx *ctx,
+			    struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[DES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
 
-	memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+	if (!walk->nbytes)
+		return ret;
+
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= DES_BLOCK_SIZE) {
-			/* align to block size, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-				nbytes & ~(DES_BLOCK_SIZE - 1);
-			for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = DES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, ctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
 				       DES_BLOCK_SIZE);
-			crypto_inc(ctrblk, DES_BLOCK_SIZE);
+			crypto_inc(ctrptr, DES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
-
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
-				       DES_BLOCK_SIZE, ctrblk);
+				       DES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != DES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, DES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
 	return ret;
 }
 

From 39424e89d64661faa0a2e00c5ad1e6dbeebfa972 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 28 Jan 2014 08:22:11 -0500
Subject: [PATCH 040/171] x86, cpu hotplug: Fix stack frame warning in
 check_irq_vectors_for_cpu_disable()

Further discussion here: http://marc.info/?l=linux-kernel&m=139073901101034&w=2

kbuild, 0day kernel build service, outputs the warning:

arch/x86/kernel/irq.c:333:1: warning: the frame size of 2056 bytes
is larger than 2048 bytes [-Wframe-larger-than=]

because check_irq_vectors_for_cpu_disable() allocates two cpumasks on the
stack.   Fix this by moving the two cpumasks to a global file context.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Link: http://lkml.kernel.org/r/1390915331-27375-1-git-send-email-prarit@redhat.com
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Yang Zhang <yang.z.zhang@Intel.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Janet Morgan <janet.morgan@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ruiv Wang <ruiv.wang@gmail.com>
Cc: Gong Chen <gong.chen@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/irq.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index dbb60878b744..d99f31d9a750 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -266,6 +266,14 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
+ * below, which is protected by stop_machine().  Putting them on the stack
+ * results in a stack frame overflow.  Dynamically allocating could result in a
+ * failure so declare these two cpumasks as global.
+ */
+static struct cpumask affinity_new, online_new;
+
 /*
  * This cpu is going to be removed and its vectors migrated to the remaining
  * online cpus.  Check to see if there are enough vectors in the remaining cpus.
@@ -277,7 +285,6 @@ int check_irq_vectors_for_cpu_disable(void)
 	unsigned int this_cpu, vector, this_count, count;
 	struct irq_desc *desc;
 	struct irq_data *data;
-	struct cpumask affinity_new, online_new;
 
 	this_cpu = smp_processor_id();
 	cpumask_copy(&online_new, cpu_online_mask);

From 54820f5802295993b78a373e404e7561039b399d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 30 Jan 2014 14:51:19 +0100
Subject: [PATCH 041/171] regulator: s2mps11: Fix NULL pointer of_node value
 when using platform data

When platform_data is used for regulator (of_node of sec-core MFD device
is NULL) the config.of_node for regulator is not initialized. This NULL
value of config.of_node is later stored during regulator_register().
Thus any call by regulator consumers to of_get_regulator() will fail on
of_parse_phandle() returning NULL.

In this case (using platform_data and parent's driver of_node is NULL)
set the config.of_node to reg_node from platform_data.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/s2mps11.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index d9e557990577..cd0b9e35a56d 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -441,6 +441,7 @@ common_reg:
 	for (i = 0; i < S2MPS11_REGULATOR_MAX; i++) {
 		if (!reg_np) {
 			config.init_data = pdata->regulators[i].initdata;
+			config.of_node = pdata->regulators[i].reg_node;
 		} else {
 			config.init_data = rdata[i].init_data;
 			config.of_node = rdata[i].of_node;

From 6a02652df511029127406cf8fa89cdf5e987f963 Mon Sep 17 00:00:00 2001
From: Francesco Fusco <ffusco@redhat.com>
Date: Mon, 27 Jan 2014 14:39:13 +0100
Subject: [PATCH 042/171] perf tools: Fix include for non x86 architectures

Commit 71ae8aac ("lib: introduce arch optimized hash library") added an
include to <linux/hash.h> for setting up an architecture specific fast
hash.

Since perf includes directly the non-uapi kernel header, it cannot find
<asm/hash.h> on non-x86 and thus prevents perf to be compiled on every
architecture other than x86.

The problem is the inclusion of <asm/hash.h> in hash.h that results in
the following error originating from util/evlist.c:

  fatal error: asm/hash.h: No such file or directory

This commit simply adds an empty <asm/hash.h> stub/file to fix the
compile issue on non-x86 architectures.

As perf does not use any of these new functions, it fixes the
compilation and therefore seems to be the most appropriate solution to
go with.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Link: http://lkml.kernel.org/r/2cf8143aad65a6aa6fe30325ef8a65847141afa2.1390829373.git.ffusco@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/include/asm/hash.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 tools/perf/util/include/asm/hash.h

diff --git a/tools/perf/util/include/asm/hash.h b/tools/perf/util/include/asm/hash.h
new file mode 100644
index 000000000000..d82b170bb216
--- /dev/null
+++ b/tools/perf/util/include/asm/hash.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_HASH_H
+#define __ASM_GENERIC_HASH_H
+
+/* Stub */
+
+#endif /* __ASM_GENERIC_HASH_H */

From 9176753d1ed56951a6ee2a0f0a3f367904e35567 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:36 +0200
Subject: [PATCH 043/171] perf symbols: Fix symbol annotation for relocated
 kernel

Kernel maps map memory addresses to file offsets.

For symbol annotation, objdump needs the object VMA addresses.  For an
unrelocated kernel, that is the same as the memory address.

The addresses passed to objdump for symbol annotation did not take into
account kernel relocation.

This patch fixes that.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c        | 5 +++--
 tools/perf/util/map.h        | 1 +
 tools/perf/util/symbol-elf.c | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 3b97513f0e77..39cd2d0faff6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -39,6 +39,7 @@ void map__init(struct map *map, enum map_type type,
 	map->start    = start;
 	map->end      = end;
 	map->pgoff    = pgoff;
+	map->reloc    = 0;
 	map->dso      = dso;
 	map->map_ip   = map__map_ip;
 	map->unmap_ip = map__unmap_ip;
@@ -288,7 +289,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
 	if (map->dso->rel)
 		return rip - map->pgoff;
 
-	return map->unmap_ip(map, rip);
+	return map->unmap_ip(map, rip) - map->reloc;
 }
 
 /**
@@ -311,7 +312,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 	if (map->dso->rel)
 		return map->unmap_ip(map, ip + map->pgoff);
 
-	return ip;
+	return ip + map->reloc;
 }
 
 void map_groups__init(struct map_groups *mg)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 18068c6b71c1..257e513205ce 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -36,6 +36,7 @@ struct map {
 	bool			erange_warned;
 	u32			priv;
 	u64			pgoff;
+	u64			reloc;
 	u32			maj, min; /* only valid for MMAP2 record */
 	u64			ino;      /* only valid for MMAP2 record */
 	u64			ino_generation;/* only valid for MMAP2 record */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 8f12f0f5101d..3e9f336740fa 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -751,6 +751,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
 			if (strcmp(elf_name, kmap->ref_reloc_sym->name))
 				continue;
 			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+			map->reloc = kmap->ref_reloc_sym->addr -
+				     kmap->ref_reloc_sym->unrelocated_addr;
 			break;
 		}
 	}

From 29b596b57426831fce92cd0ebb01c77627616fdf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:37 +0200
Subject: [PATCH 044/171] perf tools: Add kallsyms__get_function_start()

Separate out the logic used to find the start address of the reference
symbol used to track kernel relocation.  kallsyms__get_function_start()
is used in subsequent patches.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.c | 18 +++++++++++++++---
 tools/perf/util/event.h |  3 +++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1fc1c2f04772..17476df5c7b2 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -470,6 +470,17 @@ static int find_symbol_cb(void *arg, const char *name, char type,
 	return 1;
 }
 
+u64 kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name)
+{
+	struct process_symbol_args args = { .name = symbol_name, };
+
+	if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+		return 0;
+
+	return args.start;
+}
+
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
 				       struct machine *machine,
@@ -480,13 +491,13 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 	char path[PATH_MAX];
 	char name_buff[PATH_MAX];
 	struct map *map;
+	u64 start;
 	int err;
 	/*
 	 * We should get this from /sys/kernel/sections/.text, but till that is
 	 * available use this, and after it is use this as a fallback for older
 	 * kernels.
 	 */
-	struct process_symbol_args args = { .name = symbol_name, };
 	union perf_event *event = zalloc((sizeof(event->mmap) +
 					  machine->id_hdr_size));
 	if (event == NULL) {
@@ -513,7 +524,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 		}
 	}
 
-	if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) {
+	start = kallsyms__get_function_start(filename, symbol_name);
+	if (!start) {
 		free(event);
 		return -ENOENT;
 	}
@@ -525,7 +537,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 	event->mmap.header.type = PERF_RECORD_MMAP;
 	event->mmap.header.size = (sizeof(event->mmap) -
 			(sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
-	event->mmap.pgoff = args.start;
+	event->mmap.pgoff = start;
 	event->mmap.start = map->start;
 	event->mmap.len   = map->end - event->mmap.start;
 	event->mmap.pid   = machine->pid;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index faf6e219be21..66a0c0392863 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -279,4 +279,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
+u64 kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name);
+
 #endif /* __PERF_RECORD_H */

From 15a0a8706c32bd38bff9ebf7c6ef24f32d1ea921 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:38 +0200
Subject: [PATCH 045/171] perf machine: Add machine__get_kallsyms_filename()

Separate out the logic used to make the kallsyms full path name for a
machine.  It will be reused in a subsequent patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ded74590b92f..290c2e6d4001 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -496,19 +496,22 @@ static int symbol__in_kernel(void *arg, const char *name,
 	return 1;
 }
 
+static void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+					   size_t bufsz)
+{
+	if (machine__is_default_guest(machine))
+		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
+	else
+		scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
+}
+
 /* Figure out the start address of kernel map from /proc/kallsyms */
 static u64 machine__get_kernel_start_addr(struct machine *machine)
 {
-	const char *filename;
-	char path[PATH_MAX];
+	char filename[PATH_MAX];
 	struct process_args args;
 
-	if (machine__is_default_guest(machine))
-		filename = (char *)symbol_conf.default_guest_kallsyms;
-	else {
-		sprintf(path, "%s/proc/kallsyms", machine->root_dir);
-		filename = path;
-	}
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
 
 	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
 		return 0;

From 5512cf24bed2de56f1ef44b6cc9a0a9b15499cea Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:39 +0200
Subject: [PATCH 046/171] perf machine: Set up ref_reloc_sym in
 machine__create_kernel_maps()

The ref_reloc_sym is always needed for the kernel map in order to check
for relocation.  Consequently set it up when the kernel map is created.
Otherwise it was only being set up by 'perf record'.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 23 +++++++++++++++++++++++
 tools/perf/util/machine.h |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 290c2e6d4001..c872991e0f65 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -832,9 +832,25 @@ static int machine__create_modules(struct machine *machine)
 	return 0;
 }
 
+const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
+
 int machine__create_kernel_maps(struct machine *machine)
 {
 	struct dso *kernel = machine__get_kernel(machine);
+	char filename[PATH_MAX];
+	const char *name;
+	u64 addr = 0;
+	int i;
+
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+		addr = kallsyms__get_function_start(filename, name);
+		if (addr)
+			break;
+	}
+	if (!addr)
+		return -1;
 
 	if (kernel == NULL ||
 	    __machine__create_kernel_maps(machine, kernel) < 0)
@@ -853,6 +869,13 @@ int machine__create_kernel_maps(struct machine *machine)
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */
 	map_groups__fixup_end(&machine->kmaps);
+
+	if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name,
+					     addr)) {
+		machine__destroy_kernel_maps(machine);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 477133015440..f77e91e483dc 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -18,6 +18,8 @@ union perf_event;
 #define	HOST_KERNEL_ID			(-1)
 #define	DEFAULT_GUEST_KERNEL_ID		(0)
 
+extern const char *ref_reloc_sym_names[];
+
 struct machine {
 	struct rb_node	  rb_node;
 	pid_t		  pid;

From 0ae617bedde062003fd70e566e9a2601e273ea0e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:40 +0200
Subject: [PATCH 047/171] perf record: Get ref_reloc_sym from kernel map

Now that ref_reloc_sym is set up when the kernel map is created,
'perf record' does not need to pass the symbol names to
perf_event__synthesize_kernel_mmap() which can read the values needed
from ref_reloc_sym directly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 10 ++--------
 tools/perf/util/event.c     | 26 ++++++--------------------
 tools/perf/util/event.h     |  3 +--
 3 files changed, 9 insertions(+), 30 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3c394bf16fa8..af47531b82ec 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -287,10 +287,7 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 	 * have no _text sometimes.
 	 */
 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine, "_text");
-	if (err < 0)
-		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-							 machine, "_stext");
+						 machine);
 	if (err < 0)
 		pr_err("Couldn't record guest kernel [%d]'s reference"
 		       " relocation symbol.\n", machine->pid);
@@ -457,10 +454,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	}
 
 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine, "_text");
-	if (err < 0)
-		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-							 machine, "_stext");
+						 machine);
 	if (err < 0)
 		pr_err("Couldn't record kernel reference relocation symbol\n"
 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 17476df5c7b2..b0f3ca850e9e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -483,15 +483,13 @@ u64 kallsyms__get_function_start(const char *kallsyms_filename,
 
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
-				       struct machine *machine,
-				       const char *symbol_name)
+				       struct machine *machine)
 {
 	size_t size;
-	const char *filename, *mmap_name;
-	char path[PATH_MAX];
+	const char *mmap_name;
 	char name_buff[PATH_MAX];
 	struct map *map;
-	u64 start;
+	struct kmap *kmap;
 	int err;
 	/*
 	 * We should get this from /sys/kernel/sections/.text, but till that is
@@ -513,31 +511,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 		 * see kernel/perf_event.c __perf_event_mmap
 		 */
 		event->header.misc = PERF_RECORD_MISC_KERNEL;
-		filename = "/proc/kallsyms";
 	} else {
 		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-		if (machine__is_default_guest(machine))
-			filename = (char *) symbol_conf.default_guest_kallsyms;
-		else {
-			sprintf(path, "%s/proc/kallsyms", machine->root_dir);
-			filename = path;
-		}
-	}
-
-	start = kallsyms__get_function_start(filename, symbol_name);
-	if (!start) {
-		free(event);
-		return -ENOENT;
 	}
 
 	map = machine->vmlinux_maps[MAP__FUNCTION];
+	kmap = map__kmap(map);
 	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
-			"%s%s", mmap_name, symbol_name) + 1;
+			"%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1;
 	size = PERF_ALIGN(size, sizeof(u64));
 	event->mmap.header.type = PERF_RECORD_MMAP;
 	event->mmap.header.size = (sizeof(event->mmap) -
 			(sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
-	event->mmap.pgoff = start;
+	event->mmap.pgoff = kmap->ref_reloc_sym->addr;
 	event->mmap.start = map->start;
 	event->mmap.len   = map->end - event->mmap.start;
 	event->mmap.pid   = machine->pid;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 66a0c0392863..851fa06f4a42 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -214,8 +214,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
 				   struct machine *machine, bool mmap_data);
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
-				       struct machine *machine,
-				       const char *symbol_name);
+				       struct machine *machine);
 
 int perf_event__synthesize_modules(struct perf_tool *tool,
 				   perf_event__handler_t process,

From a00d28cb72d3629c6481fe21ba6c6b4f96caed49 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:41 +0200
Subject: [PATCH 048/171] perf symbols: Prevent the use of kcore if the kernel
 has moved

Use of kcore is predicated upon it matching the recorded data.  If the
kernel has been relocated at boot time (i.e. since the data was
recorded) then do not use kcore.

Note that it is possible to make a copy of kcore at the time the data is
recorded using 'perf buildid-cache'.  Then the perf tools will use the
copy because it does match the data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 39ce9adbaaf0..4ac1f871ec27 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -976,6 +976,23 @@ static int validate_kcore_modules(const char *kallsyms_filename,
 	return 0;
 }
 
+static int validate_kcore_addresses(const char *kallsyms_filename,
+				    struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
+		u64 start;
+
+		start = kallsyms__get_function_start(kallsyms_filename,
+						     kmap->ref_reloc_sym->name);
+		if (start != kmap->ref_reloc_sym->addr)
+			return -EINVAL;
+	}
+
+	return validate_kcore_modules(kallsyms_filename, map);
+}
+
 struct kcore_mapfn_data {
 	struct dso *dso;
 	enum map_type type;
@@ -1019,8 +1036,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 					     kallsyms_filename))
 		return -EINVAL;
 
-	/* All modules must be present at their original addresses */
-	if (validate_kcore_modules(kallsyms_filename, map))
+	/* Modules and kernel must be present at their original addresses */
+	if (validate_kcore_addresses(kallsyms_filename, map))
 		return -EINVAL;
 
 	md.dso = dso;
@@ -1424,7 +1441,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
 			continue;
 		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
 			  "%s/%s/kallsyms", dir, dent->d_name);
-		if (!validate_kcore_modules(kallsyms_filename, map)) {
+		if (!validate_kcore_addresses(kallsyms_filename, map)) {
 			strlcpy(dir, kallsyms_filename, dir_sz);
 			ret = 0;
 			break;
@@ -1479,7 +1496,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
 		if (fd != -1) {
 			close(fd);
 			/* If module maps match go with /proc/kallsyms */
-			if (!validate_kcore_modules("/proc/kallsyms", map))
+			if (!validate_kcore_addresses("/proc/kallsyms", map))
 				goto proc_kallsyms;
 		}
 

From c080f72753def150993144d755379941f8b14683 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:42 +0200
Subject: [PATCH 049/171] perf tests: No need to set up ref_reloc_sym

Now that ref_reloc_sym is set up by machine__create_kernel_maps(), the
"vmlinux symtab matches kallsyms" test does have to do it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/vmlinux-kallsyms.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 2bd13edcbc17..3d9088003a5b 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -26,7 +26,6 @@ int test__vmlinux_matches_kallsyms(void)
 	struct map *kallsyms_map, *vmlinux_map;
 	struct machine kallsyms, vmlinux;
 	enum map_type type = MAP__FUNCTION;
-	struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
 	u64 mem_start, mem_end;
 
 	/*
@@ -70,14 +69,6 @@ int test__vmlinux_matches_kallsyms(void)
 	 */
 	kallsyms_map = machine__kernel_map(&kallsyms, type);
 
-	sym = map__find_symbol_by_name(kallsyms_map, ref_reloc_sym.name, NULL);
-	if (sym == NULL) {
-		pr_debug("dso__find_symbol_by_name ");
-		goto out;
-	}
-
-	ref_reloc_sym.addr = UM(sym->start);
-
 	/*
 	 * Step 5:
 	 *
@@ -89,7 +80,6 @@ int test__vmlinux_matches_kallsyms(void)
 	}
 
 	vmlinux_map = machine__kernel_map(&vmlinux, type);
-	map__kmap(vmlinux_map)->ref_reloc_sym = &ref_reloc_sym;
 
 	/*
 	 * Step 6:

From d9b62aba87a82939c73f451a166c7a21342350d6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:43 +0200
Subject: [PATCH 050/171] perf tools: Adjust kallsyms for relocated kernel

If the kernel is relocated at boot time, kallsyms will not match data
recorded previously.

That does not matter for modules because they are corrected anyway.  It
also does not matter if vmlinux is being used for symbols. But if perf
tools has only kallsyms then the symbols will not match.

Fix by applying the delta gained by comparing the old and current
addresses of the relocation reference symbol.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 4ac1f871ec27..a9d758a3b371 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -627,7 +627,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *dso, struct map *map,
+static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 			       symbol_filter_t filter)
 {
 	struct map_groups *kmaps = map__kmap(map)->kmaps;
@@ -692,6 +692,12 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map,
 			char dso_name[PATH_MAX];
 			struct dso *ndso;
 
+			if (delta) {
+				/* Kernel was relocated at boot time */
+				pos->start -= delta;
+				pos->end -= delta;
+			}
+
 			if (count == 0) {
 				curr_map = map;
 				goto filter_symbol;
@@ -721,6 +727,10 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map,
 			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
 			map_groups__insert(kmaps, curr_map);
 			++kernel_range;
+		} else if (delta) {
+			/* Kernel was relocated at boot time */
+			pos->start -= delta;
+			pos->end -= delta;
 		}
 filter_symbol:
 		if (filter && filter(curr_map, pos)) {
@@ -1130,15 +1140,41 @@ out_err:
 	return -EINVAL;
 }
 
+/*
+ * If the kernel is relocated at boot time, kallsyms won't match.  Compute the
+ * delta based on the relocation reference symbol.
+ */
+static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
+{
+	struct kmap *kmap = map__kmap(map);
+	u64 addr;
+
+	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
+		return 0;
+
+	addr = kallsyms__get_function_start(filename,
+					    kmap->ref_reloc_sym->name);
+	if (!addr)
+		return -1;
+
+	*delta = addr - kmap->ref_reloc_sym->addr;
+	return 0;
+}
+
 int dso__load_kallsyms(struct dso *dso, const char *filename,
 		       struct map *map, symbol_filter_t filter)
 {
+	u64 delta = 0;
+
 	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
 		return -1;
 
 	if (dso__load_all_kallsyms(dso, filename, map) < 0)
 		return -1;
 
+	if (kallsyms__delta(map, filename, &delta))
+		return -1;
+
 	symbols__fixup_duplicate(&dso->symbols[map->type]);
 	symbols__fixup_end(&dso->symbols[map->type]);
 
@@ -1150,7 +1186,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
 	if (!dso__load_kcore(dso, map, filename))
 		return dso__split_kallsyms_for_kcore(dso, map, filter);
 	else
-		return dso__split_kallsyms(dso, map, filter);
+		return dso__split_kallsyms(dso, map, delta, filter);
 }
 
 static int dso__load_perf_map(struct dso *dso, struct map *map,

From d3b70220292c40d3b499797fd2f33f608fc35edb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 29 Jan 2014 16:14:44 +0200
Subject: [PATCH 051/171] perf buildid-cache: Check relocation when checking
 for existing kcore

perf buildid-cache does not make another copy of kcore if the buildid
and modules match an existing copy.

That does not take into account the possibility that the kernel has been
relocated.

Extend the check to check if the reference relocation symbol matches
too, otherwise do make a copy.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1391004884-10334-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-buildid-cache.c | 33 ++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index cfede86161d8..b22dbb16f877 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -63,11 +63,35 @@ static int build_id_cache__kcore_dir(char *dir, size_t sz)
 	return 0;
 }
 
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+	char from[PATH_MAX];
+	char to[PATH_MAX];
+	const char *name;
+	u64 addr1 = 0, addr2 = 0;
+	int i;
+
+	scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+	scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+		addr1 = kallsyms__get_function_start(from, name);
+		if (addr1)
+			break;
+	}
+
+	if (name)
+		addr2 = kallsyms__get_function_start(to, name);
+
+	return addr1 == addr2;
+}
+
 static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
 					  size_t to_dir_sz)
 {
 	char from[PATH_MAX];
 	char to[PATH_MAX];
+	char to_subdir[PATH_MAX];
 	struct dirent *dent;
 	int ret = -1;
 	DIR *d;
@@ -86,10 +110,11 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
 			continue;
 		scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
 			  dent->d_name);
-		if (!compare_proc_modules(from, to)) {
-			scnprintf(to, sizeof(to), "%s/%s", to_dir,
-				  dent->d_name);
-			strlcpy(to_dir, to, to_dir_sz);
+		scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+			  to_dir, dent->d_name);
+		if (!compare_proc_modules(from, to) &&
+		    same_kallsyms_reloc(from_dir, to_subdir)) {
+			strlcpy(to_dir, to_subdir, to_dir_sz);
 			ret = 0;
 			break;
 		}

From 718360c59f34b80d9878429300c1c688f7c2031d Mon Sep 17 00:00:00 2001
From: Noah Massey <noah.massey@gmail.com>
Date: Thu, 30 Jan 2014 21:31:12 -0500
Subject: [PATCH 052/171] nfs: fix setting of ACLs on file creation.

nfs3_get_acl() tries to skip posix equivalent ACLs, but misinterprets
the return value of posix_acl_equiv_mode(). Fix it.

This is a regression introduced by
"nfs: use generic posix ACL infrastructure for v3 Posix ACLs"

CC: Christoph Hellwig <hch@infradead.org>
CC: linux-nfs@vger.kernel.org
CC: linux-fsdevel@vger.kernel.org

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9a5ca03fa539..0851f852568d 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -80,7 +80,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
 	}
 
 	if (res.acl_access != NULL) {
-		if (posix_acl_equiv_mode(res.acl_access, NULL) ||
+		if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) ||
 		    res.acl_access->a_count == 0) {
 			posix_acl_release(res.acl_access);
 			res.acl_access = NULL;

From 17ead6c85c3d0ef57a14d1373f1f1cee2ce60ea8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 1 Feb 2014 14:53:23 -0500
Subject: [PATCH 053/171] NFSv4: Fix memory corruption in
 nfs4_proc_open_confirm

nfs41_wake_and_assign_slot() relies on the task->tk_msg.rpc_argp and
task->tk_msg.rpc_resp always pointing to the session sequence arguments.

nfs4_proc_open_confirm tries to pull a fast one by reusing the open
sequence structure, thus causing corruption of the NFSv4 slot table.

Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 8 ++++----
 include/linux/nfs_xdr.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 42da6af77587..2da6a698b8f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1620,15 +1620,15 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
-	nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args,
-				&data->o_res.seq_res, task);
+	nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args,
+				&data->c_res.seq_res, task);
 }
 
 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
-	nfs40_sequence_done(task, &data->o_res.seq_res);
+	nfs40_sequence_done(task, &data->c_res.seq_res);
 
 	data->rpc_status = task->tk_status;
 	if (data->rpc_status == 0) {
@@ -1686,7 +1686,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	};
 	int status;
 
-	nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1);
+	nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3ccfcecf8999..b2fb167b2e6d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -379,12 +379,14 @@ struct nfs_openres {
  * Arguments to the open_confirm call.
  */
 struct nfs_open_confirmargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 };
 
 struct nfs_open_confirmres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_seqid *	seqid;
 };

From 20b9a9024540a775395d5d1f41eec0ec6ec41f9b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 1 Feb 2014 13:47:06 -0500
Subject: [PATCH 054/171] NFSv4.1: nfs4_destroy_session must call
 rpc_destroy_waitqueue

There may still be timers active on the session waitqueues. Make sure
that we kill them before freeing the memory.

Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c  |  2 +-
 fs/nfs/nfs4session.c | 25 ++++++++++++++++++++-----
 fs/nfs/nfs4session.h |  2 +-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index dbb3e1f30c68..860ad26a5590 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -170,7 +170,7 @@ void nfs41_shutdown_client(struct nfs_client *clp)
 void nfs40_shutdown_client(struct nfs_client *clp)
 {
 	if (clp->cl_slot_tbl) {
-		nfs4_release_slot_table(clp->cl_slot_tbl);
+		nfs4_shutdown_slot_table(clp->cl_slot_tbl);
 		kfree(clp->cl_slot_tbl);
 	}
 }
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index cf883c7ae053..e799dc3c3b1d 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -231,14 +231,23 @@ out:
 	return ret;
 }
 
+/*
+ * nfs4_release_slot_table - release all slot table entries
+ */
+static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+{
+	nfs4_shrink_slot_table(tbl, 0);
+}
+
 /**
- * nfs4_release_slot_table - release resources attached to a slot table
+ * nfs4_shutdown_slot_table - release resources attached to a slot table
  * @tbl: slot table to shut down
  *
  */
-void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
 {
-	nfs4_shrink_slot_table(tbl, 0);
+	nfs4_release_slot_table(tbl);
+	rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
 }
 
 /**
@@ -422,7 +431,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
-static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+static void nfs4_release_session_slot_tables(struct nfs4_session *session)
 {
 	nfs4_release_slot_table(&session->fc_slot_table);
 	nfs4_release_slot_table(&session->bc_slot_table);
@@ -450,7 +459,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
 	if (status && tbl->slots == NULL)
 		/* Fore and back channel share a connection so get
 		 * both slot tables or neither */
-		nfs4_destroy_session_slot_tables(ses);
+		nfs4_release_session_slot_tables(ses);
 	return status;
 }
 
@@ -470,6 +479,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	return session;
 }
 
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+	nfs4_shutdown_slot_table(&session->fc_slot_table);
+	nfs4_shutdown_slot_table(&session->bc_slot_table);
+}
+
 void nfs4_destroy_session(struct nfs4_session *session)
 {
 	struct rpc_xprt *xprt;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 232306100651..b34ada9bc6a2 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -74,7 +74,7 @@ enum nfs4_session_state {
 
 extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
 		unsigned int max_reqs, const char *queue);
-extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl);
+extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);

From 85fc73a2cdf10cf42bc36fb3bca3896b2095a1c2 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Sat, 1 Feb 2014 13:30:19 +0100
Subject: [PATCH 055/171] x86: Fix the initialization of physnode_map

With DISCONTIGMEM, the mapping between a pfn and its owning node is
initialized using data provided by the BIOS. However, the initialization
may fail if the extents are not aligned to section boundary (64M).

The symptom of this bug is an early boot failure in pfn_to_page(),
as it tries to access NODE_DATA(__nid) using index from an unitialized
element of the physnode_map[] array.

While the bug is always present, it is more likely to be hit in kdump
kernels on large machines, because:

1. The memory map for a kdump kernel is specified as exactmap, and
   exactmap is more likely to be unaligned.

2. Large reservations are more likely to span across a 64M boundary.

[ hpa: fixed incorrect use of "pfn" instead of "start" ]

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Link: http://lkml.kernel.org/r/20140201133019.32e56f86@hananiah.suse.cz
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 0342d27ca798..47b6436e41c2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end)
 			nid, start, end);
 	printk(KERN_DEBUG "  Setting physnode_map array to node %d for pfns:\n", nid);
 	printk(KERN_DEBUG "  ");
+	start = round_down(start, PAGES_PER_SECTION);
+	end = round_up(end, PAGES_PER_SECTION);
 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 		physnode_map[pfn / PAGES_PER_SECTION] = nid;
 		printk(KERN_CONT "%lx ", pfn);

From 9ac27090f61ea6735a62b0a98c7669c833bcdc09 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 31 Jan 2014 16:53:39 -0700
Subject: [PATCH 056/171] NVMe: Namespace use after free on surprise removal

An nvme block device may have open references when the device is
removed. New commands may still be sent on the removed device, so we
need to ref count the opens, return errors for new commands, and not
free the namespace and nvme_dev until all references are closed.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 55 ++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 23728099e36f..cd39390710a0 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1716,10 +1716,31 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl	NULL
 #endif
 
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+	struct nvme_dev *dev = ns->dev;
+
+	kref_get(&dev->kref);
+	return 0;
+}
+
+static void nvme_free_dev(struct kref *kref);
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+	struct nvme_ns *ns = disk->private_data;
+	struct nvme_dev *dev = ns->dev;
+
+	kref_put(&dev->kref, nvme_free_dev);
+}
+
 static const struct block_device_operations nvme_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= nvme_ioctl,
 	.compat_ioctl	= nvme_compat_ioctl,
+	.open		= nvme_open,
+	.release	= nvme_release,
 };
 
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
@@ -1849,13 +1870,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	return NULL;
 }
 
-static void nvme_ns_free(struct nvme_ns *ns)
-{
-	put_disk(ns->disk);
-	blk_cleanup_queue(ns->queue);
-	kfree(ns);
-}
-
 static int set_queue_count(struct nvme_dev *dev, int count)
 {
 	int status;
@@ -2287,12 +2301,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
-	struct nvme_ns *ns, *next;
+	struct nvme_ns *ns;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-		del_gendisk(ns->disk);
-		nvme_ns_free(ns);
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		if (ns->disk->flags & GENHD_FL_UP)
+			del_gendisk(ns->disk);
+		if (!blk_queue_dying(ns->queue))
+			blk_cleanup_queue(ns->queue);
 	}
 }
 
@@ -2349,9 +2364,22 @@ static void nvme_release_instance(struct nvme_dev *dev)
 	spin_unlock(&dev_list_lock);
 }
 
+static void nvme_free_namespaces(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns, *next;
+
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		put_disk(ns->disk);
+		kfree(ns);
+	}
+}
+
 static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+
+	nvme_free_namespaces(dev);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2525,6 +2553,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto release_pools;
 	}
 
+	kref_init(&dev->kref);
 	result = nvme_dev_add(dev);
 	if (result)
 		goto shutdown;
@@ -2540,11 +2569,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto remove;
 
 	dev->initialized = 1;
-	kref_init(&dev->kref);
 	return 0;
 
  remove:
 	nvme_dev_remove(dev);
+	nvme_free_namespaces(dev);
  shutdown:
 	nvme_dev_shutdown(dev);
  release_pools:

From daa436e67cd2dcac7cbb505bb4425fdfdafaa5a7 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 30 Jan 2014 19:51:14 -0800
Subject: [PATCH 057/171] hwmon: (pmbus) Support per-page exponent in linear
 mode

Some chips use different exponents for sensors on different pages
or rails. Detect and store exponent per page to support this situation.

This fixes a problem with wrong voltages seen on UCD90120.

Reported-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Tested-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 68 +++++++++++++++++---------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 3cbf66e9d861..291d11fe93e7 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -90,7 +90,8 @@ struct pmbus_data {
 
 	u32 flags;		/* from platform data */
 
-	int exponent;		/* linear mode: exponent for output voltages */
+	int exponent[PMBUS_PAGES];
+				/* linear mode: exponent for output voltages */
 
 	const struct pmbus_driver_info *info;
 
@@ -410,7 +411,7 @@ static long pmbus_reg2data_linear(struct pmbus_data *data,
 	long val;
 
 	if (sensor->class == PSC_VOLTAGE_OUT) {	/* LINEAR16 */
-		exponent = data->exponent;
+		exponent = data->exponent[sensor->page];
 		mantissa = (u16) sensor->data;
 	} else {				/* LINEAR11 */
 		exponent = ((s16)sensor->data) >> 11;
@@ -516,7 +517,7 @@ static long pmbus_reg2data(struct pmbus_data *data, struct pmbus_sensor *sensor)
 #define MIN_MANTISSA	(511 * 1000)
 
 static u16 pmbus_data2reg_linear(struct pmbus_data *data,
-				 enum pmbus_sensor_classes class, long val)
+				 struct pmbus_sensor *sensor, long val)
 {
 	s16 exponent = 0, mantissa;
 	bool negative = false;
@@ -525,7 +526,7 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data,
 	if (val == 0)
 		return 0;
 
-	if (class == PSC_VOLTAGE_OUT) {
+	if (sensor->class == PSC_VOLTAGE_OUT) {
 		/* LINEAR16 does not support negative voltages */
 		if (val < 0)
 			return 0;
@@ -534,10 +535,10 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data,
 		 * For a static exponents, we don't have a choice
 		 * but to adjust the value to it.
 		 */
-		if (data->exponent < 0)
-			val <<= -data->exponent;
+		if (data->exponent[sensor->page] < 0)
+			val <<= -data->exponent[sensor->page];
 		else
-			val >>= data->exponent;
+			val >>= data->exponent[sensor->page];
 		val = DIV_ROUND_CLOSEST(val, 1000);
 		return val & 0xffff;
 	}
@@ -548,14 +549,14 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data,
 	}
 
 	/* Power is in uW. Convert to mW before converting. */
-	if (class == PSC_POWER)
+	if (sensor->class == PSC_POWER)
 		val = DIV_ROUND_CLOSEST(val, 1000L);
 
 	/*
 	 * For simplicity, convert fan data to milli-units
 	 * before calculating the exponent.
 	 */
-	if (class == PSC_FAN)
+	if (sensor->class == PSC_FAN)
 		val = val * 1000;
 
 	/* Reduce large mantissa until it fits into 10 bit */
@@ -585,22 +586,22 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data,
 }
 
 static u16 pmbus_data2reg_direct(struct pmbus_data *data,
-				 enum pmbus_sensor_classes class, long val)
+				 struct pmbus_sensor *sensor, long val)
 {
 	long m, b, R;
 
-	m = data->info->m[class];
-	b = data->info->b[class];
-	R = data->info->R[class];
+	m = data->info->m[sensor->class];
+	b = data->info->b[sensor->class];
+	R = data->info->R[sensor->class];
 
 	/* Power is in uW. Adjust R and b. */
-	if (class == PSC_POWER) {
+	if (sensor->class == PSC_POWER) {
 		R -= 3;
 		b *= 1000;
 	}
 
 	/* Calculate Y = (m * X + b) * 10^R */
-	if (class != PSC_FAN) {
+	if (sensor->class != PSC_FAN) {
 		R -= 3;		/* Adjust R and b for data in milli-units */
 		b *= 1000;
 	}
@@ -619,7 +620,7 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data,
 }
 
 static u16 pmbus_data2reg_vid(struct pmbus_data *data,
-			      enum pmbus_sensor_classes class, long val)
+			      struct pmbus_sensor *sensor, long val)
 {
 	val = clamp_val(val, 500, 1600);
 
@@ -627,20 +628,20 @@ static u16 pmbus_data2reg_vid(struct pmbus_data *data,
 }
 
 static u16 pmbus_data2reg(struct pmbus_data *data,
-			  enum pmbus_sensor_classes class, long val)
+			  struct pmbus_sensor *sensor, long val)
 {
 	u16 regval;
 
-	switch (data->info->format[class]) {
+	switch (data->info->format[sensor->class]) {
 	case direct:
-		regval = pmbus_data2reg_direct(data, class, val);
+		regval = pmbus_data2reg_direct(data, sensor, val);
 		break;
 	case vid:
-		regval = pmbus_data2reg_vid(data, class, val);
+		regval = pmbus_data2reg_vid(data, sensor, val);
 		break;
 	case linear:
 	default:
-		regval = pmbus_data2reg_linear(data, class, val);
+		regval = pmbus_data2reg_linear(data, sensor, val);
 		break;
 	}
 	return regval;
@@ -746,7 +747,7 @@ static ssize_t pmbus_set_sensor(struct device *dev,
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
-	regval = pmbus_data2reg(data, sensor->class, val);
+	regval = pmbus_data2reg(data, sensor, val);
 	ret = _pmbus_write_word_data(client, sensor->page, sensor->reg, regval);
 	if (ret < 0)
 		rv = ret;
@@ -1643,12 +1644,13 @@ static int pmbus_find_attributes(struct i2c_client *client,
  * This function is called for all chips.
  */
 static int pmbus_identify_common(struct i2c_client *client,
-				 struct pmbus_data *data)
+				 struct pmbus_data *data, int page)
 {
 	int vout_mode = -1;
 
-	if (pmbus_check_byte_register(client, 0, PMBUS_VOUT_MODE))
-		vout_mode = _pmbus_read_byte_data(client, 0, PMBUS_VOUT_MODE);
+	if (pmbus_check_byte_register(client, page, PMBUS_VOUT_MODE))
+		vout_mode = _pmbus_read_byte_data(client, page,
+						  PMBUS_VOUT_MODE);
 	if (vout_mode >= 0 && vout_mode != 0xff) {
 		/*
 		 * Not all chips support the VOUT_MODE command,
@@ -1659,7 +1661,7 @@ static int pmbus_identify_common(struct i2c_client *client,
 			if (data->info->format[PSC_VOLTAGE_OUT] != linear)
 				return -ENODEV;
 
-			data->exponent = ((s8)(vout_mode << 3)) >> 3;
+			data->exponent[page] = ((s8)(vout_mode << 3)) >> 3;
 			break;
 		case 1: /* VID mode         */
 			if (data->info->format[PSC_VOLTAGE_OUT] != vid)
@@ -1674,7 +1676,7 @@ static int pmbus_identify_common(struct i2c_client *client,
 		}
 	}
 
-	pmbus_clear_fault_page(client, 0);
+	pmbus_clear_fault_page(client, page);
 	return 0;
 }
 
@@ -1682,7 +1684,7 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 			     struct pmbus_driver_info *info)
 {
 	struct device *dev = &client->dev;
-	int ret;
+	int page, ret;
 
 	/*
 	 * Some PMBus chips don't support PMBUS_STATUS_BYTE, so try
@@ -1715,10 +1717,12 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 		return -ENODEV;
 	}
 
-	ret = pmbus_identify_common(client, data);
-	if (ret < 0) {
-		dev_err(dev, "Failed to identify chip capabilities\n");
-		return ret;
+	for (page = 0; page < info->pages; page++) {
+		ret = pmbus_identify_common(client, data, page);
+		if (ret < 0) {
+			dev_err(dev, "Failed to identify chip capabilities\n");
+			return ret;
+		}
 	}
 	return 0;
 }

From b0dcfd87323ea86501e93d0fa2a98d2fd3579bcf Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Tue, 21 Jan 2014 16:55:18 +0100
Subject: [PATCH 058/171] pinctrl: at91: use locked variant of irq_set_handler

When setting the gpio irq type, use the __irq_set_handler_locked()
variant instead of the irq_set_handler() to prevent false
spinlock recursion warning.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: stable <stable@vger.kernel.org> # v3.12
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-at91.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 38c6f8b9790e..d990e33d8aa7 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1286,22 +1286,22 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type)
 
 	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
-		irq_set_handler(d->irq, handle_simple_irq);
+		__irq_set_handler_locked(d->irq, handle_simple_irq);
 		writel_relaxed(mask, pio + PIO_ESR);
 		writel_relaxed(mask, pio + PIO_REHLSR);
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		irq_set_handler(d->irq, handle_simple_irq);
+		__irq_set_handler_locked(d->irq, handle_simple_irq);
 		writel_relaxed(mask, pio + PIO_ESR);
 		writel_relaxed(mask, pio + PIO_FELLSR);
 		break;
 	case IRQ_TYPE_LEVEL_LOW:
-		irq_set_handler(d->irq, handle_level_irq);
+		__irq_set_handler_locked(d->irq, handle_level_irq);
 		writel_relaxed(mask, pio + PIO_LSR);
 		writel_relaxed(mask, pio + PIO_FELLSR);
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
-		irq_set_handler(d->irq, handle_level_irq);
+		__irq_set_handler_locked(d->irq, handle_level_irq);
 		writel_relaxed(mask, pio + PIO_LSR);
 		writel_relaxed(mask, pio + PIO_REHLSR);
 		break;
@@ -1310,7 +1310,7 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type)
 		 * disable additional interrupt modes:
 		 * fall back to default behavior
 		 */
-		irq_set_handler(d->irq, handle_simple_irq);
+		__irq_set_handler_locked(d->irq, handle_simple_irq);
 		writel_relaxed(mask, pio + PIO_AIMDR);
 		return 0;
 	case IRQ_TYPE_NONE:

From 795779df22afc8bdee4e9fbe5c18c47e44974d75 Mon Sep 17 00:00:00 2001
From: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Date: Wed, 22 Jan 2014 11:14:51 +0800
Subject: [PATCH 059/171] pinctrl: imx27: fix wrong offset to ICONFB

The offset to ICONFB was incorrect, this patch set the correct value 0x14.
dev_dbg in function imx1_write_2bit print the wrong address and had been
moved after address calculation.

Cc: stable@vger.kernel.org
Signed-off-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Reviewed-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-imx1-core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c
index 17aecde1b51d..9e7893fe96e0 100644
--- a/drivers/pinctrl/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/pinctrl-imx1-core.c
@@ -45,7 +45,7 @@ struct imx1_pinctrl {
 #define MX1_DDIR 0x00
 #define MX1_OCR 0x04
 #define MX1_ICONFA 0x0c
-#define MX1_ICONFB 0x10
+#define MX1_ICONFB 0x14
 #define MX1_GIUS 0x20
 #define MX1_GPR 0x38
 #define MX1_PUEN 0x40
@@ -97,13 +97,13 @@ static void imx1_write_2bit(struct imx1_pinctrl *ipctl, unsigned int pin_id,
 	u32 old_val;
 	u32 new_val;
 
-	dev_dbg(ipctl->dev, "write: register 0x%p offset %d value 0x%x\n",
-			reg, offset, value);
-
 	/* Use the next register if the pin's port pin number is >=16 */
 	if (pin_id % 32 >= 16)
 		reg += 0x04;
 
+	dev_dbg(ipctl->dev, "write: register 0x%p offset %d value 0x%x\n",
+			reg, offset, value);
+
 	/* Get current state of pins */
 	old_val = readl(reg);
 	old_val &= mask;

From f17248ed868767567298e1cdf06faf8159a81f7c Mon Sep 17 00:00:00 2001
From: Tony Prisk <linux@prisktech.co.nz>
Date: Thu, 23 Jan 2014 21:57:33 +1300
Subject: [PATCH 060/171] pinctrl: vt8500: Change devicetree data parsing

Due to an assumption in the VT8500 pinctrl driver, the value passed
from devicetree for 'wm,pull' was not explicitly translated before
being passed to pinconf.

Since v3.10, changes to 'enum pin_config_param', PIN_CONFIG_BIAS_PULL_(UP/DOWN)
no longer map 1-to-1 with the expected values in devicetree.

This patch adds a small translation between the devicetree values (0..2)
and the enum pin_config_param equivalent values.

Cc: <stable@vger.kernel.org> # v3.10+
Signed-off-by: Tony Prisk <linux@prisktech.co.nz>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/vt8500/pinctrl-wmt.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index b28d1af9c232..9802b67040cc 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -276,7 +276,20 @@ static int wmt_pctl_dt_node_to_map_pull(struct wmt_pinctrl_data *data,
 	if (!configs)
 		return -ENOMEM;
 
-	configs[0] = pull;
+	switch (pull) {
+	case 0:
+		configs[0] = PIN_CONFIG_BIAS_DISABLE;
+		break;
+	case 1:
+		configs[0] = PIN_CONFIG_BIAS_PULL_DOWN;
+		break;
+	case 2:
+		configs[0] = PIN_CONFIG_BIAS_PULL_UP;
+		break;
+	default:
+		configs[0] = PIN_CONFIG_BIAS_DISABLE;
+		dev_err(data->dev, "invalid pull state %d - disabling\n", pull);
+	}
 
 	map->type = PIN_MAP_TYPE_CONFIGS_PIN;
 	map->data.configs.group_or_pin = data->groups[group];

From e3365d0974ed64157f5b5a576c611057dc40a595 Mon Sep 17 00:00:00 2001
From: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Date: Wed, 22 Jan 2014 11:14:52 +0800
Subject: [PATCH 061/171] pinctrl: imx27: fix offset calculation in
 imx_read_2bit

The offset for the 2bit register calculate wrong, this patch
fixes the problem. The debugfs printout for oconf, iconfa, iconfb
now shows the real values.

Cc: stable@vger.kernel.org
Signed-off-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Reviewed-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-imx1-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c
index 9e7893fe96e0..815384b377b5 100644
--- a/drivers/pinctrl/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/pinctrl-imx1-core.c
@@ -139,7 +139,7 @@ static int imx1_read_2bit(struct imx1_pinctrl *ipctl, unsigned int pin_id,
 		u32 reg_offset)
 {
 	void __iomem *reg = imx1_mem(ipctl, pin_id) + reg_offset;
-	int offset = pin_id % 16;
+	int offset = (pin_id % 16) * 2;
 
 	/* Use the next register if the pin's port pin number is >=16 */
 	if (pin_id % 32 >= 16)

From fa74d0d3e30cf079e94db327ea6d4726cd7d5871 Mon Sep 17 00:00:00 2001
From: Qipan Li <Qipan.Li@csr.com>
Date: Mon, 27 Jan 2014 14:01:29 +0800
Subject: [PATCH 062/171] pinctrl: sirf: correct the pin index of ac97_pins
 group

according to datasheet and ac97_muxmask assignment, ac97_pins should be
corrected.

Signed-off-by: Qipan Li <Qipan.Li@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/sirf/pinctrl-prima2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/sirf/pinctrl-prima2.c b/drivers/pinctrl/sirf/pinctrl-prima2.c
index 37b42651d76a..dde0285544d6 100644
--- a/drivers/pinctrl/sirf/pinctrl-prima2.c
+++ b/drivers/pinctrl/sirf/pinctrl-prima2.c
@@ -413,7 +413,7 @@ static const struct sirfsoc_padmux ac97_padmux = {
 	.funcval = 0,
 };
 
-static const unsigned ac97_pins[] = { 33, 34, 35, 36 };
+static const unsigned ac97_pins[] = { 43, 44, 45, 46 };
 
 static const struct sirfsoc_muxmask spi1_muxmask[] = {
 	{

From 4fa71c1550a857ff1dbfe9e99acc1f4cfec5f0d0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 3 Feb 2014 09:37:59 +0100
Subject: [PATCH 063/171] ALSA: usb-audio: Add missing kconfig dependecy

The commit 44dcbbb1cd61 introduced the usage of bitreverse helpers but
forgot to add the dependency.  This patch adds the selection for
CONFIG_BITREVERSE.

Fixes: 44dcbbb1cd61 ('ALSA: snd-usb: add support for bit-reversed byte formats')
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig
index de9408b83f75..e05a86b7c0da 100644
--- a/sound/usb/Kconfig
+++ b/sound/usb/Kconfig
@@ -14,6 +14,7 @@ config SND_USB_AUDIO
 	select SND_HWDEP
 	select SND_RAWMIDI
 	select SND_PCM
+	select BITREVERSE
 	help
 	  Say Y here to include support for USB audio and USB MIDI
 	  devices.

From e85fc9805591a17ca8af50023ee8e2b61d9a123b Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 3 Feb 2014 06:43:59 -0500
Subject: [PATCH 064/171] Revert "xen/grant-table: Avoid m2p_override during
 mapping"

This reverts commit 08ece5bb2312b4510b161a6ef6682f37f4eac8a1.

As it breaks ARM builds and needs more attention
on the ARM side.

Acked-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h     |  5 +-
 arch/x86/xen/p2m.c                  | 17 +++++-
 drivers/block/xen-blkback/blkback.c | 15 +++--
 drivers/xen/gntdev.c                | 13 ++---
 drivers/xen/grant-table.c           | 89 +++++------------------------
 include/xen/grant_table.h           |  8 +--
 6 files changed, 46 insertions(+), 101 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 787e1bb5aafc..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -52,8 +52,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 extern int m2p_add_override(unsigned long mfn, struct page *page,
 			    struct gnttab_map_grant_ref *kmap_op);
 extern int m2p_remove_override(struct page *page,
-			       struct gnttab_map_grant_ref *kmap_op,
-			       unsigned long mfn);
+				struct gnttab_map_grant_ref *kmap_op);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
@@ -122,7 +121,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 		pfn = m2p_find_override_pfn(mfn, ~0);
 	}
 
-	/*
+	/* 
 	 * pfn is ~0 if there are no entries in the m2p for mfn or if the
 	 * entry doesn't map back to the mfn and m2p_override doesn't have a
 	 * valid entry for it.
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8009acbe41e4..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -899,6 +899,13 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
+	WARN_ON(PagePrivate(page));
+	SetPagePrivate(page);
+	set_page_private(page, mfn);
+	page->index = pfn_to_mfn(pfn);
+
+	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+		return -ENOMEM;
 
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
@@ -937,16 +944,19 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
 int m2p_remove_override(struct page *page,
-			struct gnttab_map_grant_ref *kmap_op,
-			unsigned long mfn)
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
+	unsigned long mfn;
 	unsigned long pfn;
 	unsigned long uninitialized_var(address);
 	unsigned level;
 	pte_t *ptep = NULL;
 
 	pfn = page_to_pfn(page);
+	mfn = get_phys_to_machine(pfn);
+	if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
+		return -EINVAL;
 
 	if (!PageHighMem(page)) {
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -960,7 +970,10 @@ int m2p_remove_override(struct page *page,
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_del(&page->lru);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
+	WARN_ON(!PagePrivate(page));
+	ClearPagePrivate(page);
 
+	set_phys_to_machine(pfn, page->index);
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 875025f299b6..6620b73d0490 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -285,7 +285,8 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 
 		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
 			!rb_next(&persistent_gnt->node)) {
-			ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
 			BUG_ON(ret);
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
@@ -320,7 +321,8 @@ static void unmap_purged_grants(struct work_struct *work)
 		pages[segs_to_unmap] = persistent_gnt->page;
 
 		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-			ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
 			BUG_ON(ret);
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
@@ -328,7 +330,7 @@ static void unmap_purged_grants(struct work_struct *work)
 		kfree(persistent_gnt);
 	}
 	if (segs_to_unmap > 0) {
-		ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap);
+		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
 		BUG_ON(ret);
 		put_free_pages(blkif, pages, segs_to_unmap);
 	}
@@ -668,14 +670,15 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 				    GNTMAP_host_map, pages[i]->handle);
 		pages[i]->handle = BLKBACK_INVALID_HANDLE;
 		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-			ret = gnttab_unmap_refs(unmap, unmap_pages, invcount);
+			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+			                        invcount);
 			BUG_ON(ret);
 			put_free_pages(blkif, unmap_pages, invcount);
 			invcount = 0;
 		}
 	}
 	if (invcount) {
-		ret = gnttab_unmap_refs(unmap, unmap_pages, invcount);
+		ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
 		BUG_ON(ret);
 		put_free_pages(blkif, unmap_pages, invcount);
 	}
@@ -737,7 +740,7 @@ again:
 	}
 
 	if (segs_to_map) {
-		ret = gnttab_map_refs(map, pages_to_gnt, segs_to_map);
+		ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
 		BUG_ON(ret);
 	}
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 34a2704fbc88..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -284,10 +284,8 @@ static int map_grant_pages(struct grant_map *map)
 	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
-	err = gnttab_map_refs_userspace(map->map_ops,
-					use_ptemod ? map->kmap_ops : NULL,
-					map->pages,
-					map->count);
+	err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+			map->pages, map->count);
 	if (err)
 		return err;
 
@@ -317,10 +315,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	err = gnttab_unmap_refs_userspace(map->unmap_ops + offset,
-					  use_ptemod ? map->kmap_ops + offset : NULL,
-					  map->pages + offset,
-					  pages);
+	err = gnttab_unmap_refs(map->unmap_ops + offset,
+			use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
+			pages);
 	if (err)
 		return err;
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 8ee13e2e45e2..b84e3ab839aa 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -928,17 +928,15 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
 }
 EXPORT_SYMBOL_GPL(gnttab_batch_copy);
 
-int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
-		    struct page **pages, unsigned int count,
-		    bool m2p_override)
+		    struct page **pages, unsigned int count)
 {
 	int i, ret;
 	bool lazy = false;
 	pte_t *pte;
-	unsigned long mfn, pfn;
+	unsigned long mfn;
 
-	BUG_ON(kmap_ops && !m2p_override);
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
 	if (ret)
 		return ret;
@@ -957,12 +955,10 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 			set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT,
 					map_ops[i].dev_bus_addr >> PAGE_SHIFT);
 		}
-		return 0;
+		return ret;
 	}
 
-	if (m2p_override &&
-	    !in_interrupt() &&
-	    paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
 		arch_enter_lazy_mmu_mode();
 		lazy = true;
 	}
@@ -979,20 +975,8 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		} else {
 			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
 		}
-		pfn = page_to_pfn(pages[i]);
-
-		WARN_ON(PagePrivate(pages[i]));
-		SetPagePrivate(pages[i]);
-		set_page_private(pages[i], mfn);
-
-		pages[i]->index = pfn_to_mfn(pfn);
-		if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		if (m2p_override)
-			ret = m2p_add_override(mfn, pages[i], kmap_ops ?
-					       &kmap_ops[i] : NULL);
+		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			goto out;
 	}
@@ -1003,32 +987,15 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 
 	return ret;
 }
-
-int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
-		    struct page **pages, unsigned int count)
-{
-	return __gnttab_map_refs(map_ops, NULL, pages, count, false);
-}
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
-int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops,
-			      struct gnttab_map_grant_ref *kmap_ops,
-			      struct page **pages, unsigned int count)
-{
-	return __gnttab_map_refs(map_ops, kmap_ops, pages, count, true);
-}
-EXPORT_SYMBOL_GPL(gnttab_map_refs_userspace);
-
-int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
+int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		      struct gnttab_map_grant_ref *kmap_ops,
-		      struct page **pages, unsigned int count,
-		      bool m2p_override)
+		      struct page **pages, unsigned int count)
 {
 	int i, ret;
 	bool lazy = false;
-	unsigned long pfn, mfn;
 
-	BUG_ON(kmap_ops && !m2p_override);
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
 	if (ret)
 		return ret;
@@ -1039,33 +1006,17 @@ int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 			set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT,
 					INVALID_P2M_ENTRY);
 		}
-		return 0;
+		return ret;
 	}
 
-	if (m2p_override &&
-	    !in_interrupt() &&
-	    paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
 		arch_enter_lazy_mmu_mode();
 		lazy = true;
 	}
 
 	for (i = 0; i < count; i++) {
-		pfn = page_to_pfn(pages[i]);
-		mfn = get_phys_to_machine(pfn);
-		if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		set_page_private(pages[i], INVALID_P2M_ENTRY);
-		WARN_ON(!PagePrivate(pages[i]));
-		ClearPagePrivate(pages[i]);
-		set_phys_to_machine(pfn, pages[i]->index);
-		if (m2p_override)
-			ret = m2p_remove_override(pages[i],
-						  kmap_ops ?
-						   &kmap_ops[i] : NULL,
-						  mfn);
+		ret = m2p_remove_override(pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			goto out;
 	}
@@ -1076,22 +1027,8 @@ int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 
 	return ret;
 }
-
-int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *map_ops,
-		    struct page **pages, unsigned int count)
-{
-	return __gnttab_unmap_refs(map_ops, NULL, pages, count, false);
-}
 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
 
-int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *map_ops,
-				struct gnttab_map_grant_ref *kmap_ops,
-				struct page **pages, unsigned int count)
-{
-	return __gnttab_unmap_refs(map_ops, kmap_ops, pages, count, true);
-}
-EXPORT_SYMBOL_GPL(gnttab_unmap_refs_userspace);
-
 static unsigned nr_status_frames(unsigned nr_grant_frames)
 {
 	BUG_ON(grefs_per_grant_frame == 0);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 7ad033dbc845..a5af2a26d94f 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -191,15 +191,11 @@ void gnttab_free_auto_xlat_frames(void);
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
-int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops,
-			      struct gnttab_map_grant_ref *kmap_ops,
-			      struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
+		      struct gnttab_map_grant_ref *kunmap_ops,
 		      struct page **pages, unsigned int count);
-int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *unmap_ops,
-				struct gnttab_map_grant_ref *kunmap_ops,
-				struct page **pages, unsigned int count);
 
 /* Perform a batch of grant map/copy operations. Retry every batch slot
  * for which the hypervisor returns GNTST_eagain. This is typically due

From 8101c8dbf6243ba517aab58d69bf1bc37d8b7b9c Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 29 Jan 2014 16:05:30 -0500
Subject: [PATCH 065/171] Btrfs: disable snapshot aware defrag for now

It's just broken and it's taking a lot of effort to fix it, so for now just
disable it so people can defrag in peace.  Thanks,

Cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb74a536add3..1af34d0c744b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2629,7 +2629,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			EXTENT_DEFRAG, 1, cached_state);
 	if (ret) {
 		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-		if (last_snapshot >= BTRFS_I(inode)->generation)
+		if (0 && last_snapshot >= BTRFS_I(inode)->generation)
 			/* the inode is shared */
 			new = record_old_file_extents(inode, ordered_extent);
 

From 0b947aff1599afbbd2ec07ada87b05af0f94cf10 Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Wed, 29 Jan 2014 21:06:04 +0000
Subject: [PATCH 066/171] Btrfs: use btrfs_crc32c everywhere instead of
 libcrc32c

After the commit titled "Btrfs: fix btrfs boot when compiled as built-in",
LIBCRC32C requirement was removed from btrfs' Kconfig. This made it not
possible to build a kernel with btrfs enabled (either as module or built-in)
if libcrc32c is not enabled as well. So just replace all uses of libcrc32c
with the equivalent function in btrfs hash.h - btrfs_crc32c.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/check-integrity.c | 4 ++--
 fs/btrfs/disk-io.c         | 4 ++--
 fs/btrfs/send.c            | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 160fb509d720..39bfd56a1f26 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -92,11 +92,11 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
-#include <linux/crc32c.h>
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "extent_io.h"
 #include "volumes.h"
@@ -1823,7 +1823,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 		size_t sublen = i ? PAGE_CACHE_SIZE :
 				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
 
-		crc = crc32c(crc, data, sublen);
+		crc = btrfs_crc32c(crc, data, sublen);
 	}
 	btrfs_csum_final(crc, csum);
 	if (memcmp(csum, h->csum, state->csum_size))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7619147da382..3903bd3f8d2b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,7 +26,6 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-#include <linux/crc32c.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
@@ -244,7 +244,7 @@ out:
 
 u32 btrfs_csum_data(char *data, u32 seed, size_t len)
 {
-	return crc32c(seed, data, len);
+	return btrfs_crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 730dce395858..cf9107a64204 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -24,12 +24,12 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/radix-tree.h>
-#include <linux/crc32c.h>
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 
 #include "send.h"
 #include "backref.h"
+#include "hash.h"
 #include "locking.h"
 #include "disk-io.h"
 #include "btrfs_inode.h"
@@ -620,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx)
 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 	hdr->crc = 0;
 
-	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 	hdr->crc = cpu_to_le32(crc);
 
 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,

From 60efa5eb2e886852a0d5f9e1ffa7c896a1099da8 Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Sat, 1 Feb 2014 21:27:56 +0000
Subject: [PATCH 067/171] Btrfs: use late_initcall instead of module_init

It seems that when init_btrfs_fs() is called, crc32c/crc32c-intel might
not always be already initialized, which results in the call to crypto_alloc_shash()
returning -ENOENT, as experienced by Ahmet who reported this.

Therefore make sure init_btrfs_fs() is called after crc32c is initialized (which
is at initialization level 6, module_init), by using late_initcall (which is at
initialization level 7) instead of module_init for btrfs.

Reported-and-Tested-by: Ahmet Inan <ainan@mathematik.uni-freiburg.de>
Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c02f63356895..97cc24198554 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1996,7 +1996,7 @@ static void __exit exit_btrfs_fs(void)
 	btrfs_hash_exit();
 }
 
-module_init(init_btrfs_fs)
+late_initcall(init_btrfs_fs);
 module_exit(exit_btrfs_fs)
 
 MODULE_LICENSE("GPL");

From c172ec5c8dc8c09dd5958f4ae542fa321bb15a92 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 31 Jan 2014 17:49:22 +0200
Subject: [PATCH 068/171] libceph: fix error handling in ceph_osdc_init()

msgpool_op_reply message pool isn't destroyed if workqueue construction
fails.  Fix it.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/osd_client.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 010ff3bd58ad..166d4c7ba065 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2504,9 +2504,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	err = -ENOMEM;
 	osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
 	if (!osdc->notify_wq)
-		goto out_msgpool;
+		goto out_msgpool_reply;
+
 	return 0;
 
+out_msgpool_reply:
+	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 out_msgpool:
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 out_mempool:

From d4c42fb493e018e9240810bb6dc5334ae0505145 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 2 Feb 2014 14:41:42 -0500
Subject: [PATCH 069/171] NFSv3: Remove unused function
 nfs3_proc_set_default_acl

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 0851f852568d..9271a6bb9a41 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -233,25 +233,6 @@ fail:
 	return PTR_ERR(alloc);
 }
 
-int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
-		umode_t mode)
-{
-	struct posix_acl *default_acl, *acl;
-	int error;
-
-	error = posix_acl_create(dir, &mode, &default_acl, &acl);
-	if (error)
-		return (error == -EOPNOTSUPP) ? 0 : error;
-
-	error = nfs3_proc_setacls(inode, acl, default_acl);
-
-	if (acl)
-		posix_acl_release(acl);
-	if (default_acl)
-		posix_acl_release(default_acl);
-	return error;
-}
-
 const struct xattr_handler *nfs3_xattr_handlers[] = {
 	&posix_acl_access_xattr_handler,
 	&posix_acl_default_xattr_handler,

From 8f493b9cfcd8941c6b27d6ce8e3b4a78c094b3c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 2 Feb 2014 14:36:42 -0500
Subject: [PATCH 070/171] NFSv3: Fix return value of nfs3_proc_setacls

nfs3_proc_setacls is used internally by the NFSv3 create operations
to set the acl after the file has been created. If the operation
fails because the server doesn't support acls, then it must return '0',
not -EOPNOTSUPP.

Reported-by: Russell King <linux@arm.linux.org.uk>
Link: http://lkml.kernel.org/r/20140201010328.GI15937@n2100.arm.linux.org.uk
Cc: Christoph Hellwig <hch@lst.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9271a6bb9a41..871d6eda8dba 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -113,7 +113,7 @@ getout:
 	return ERR_PTR(status);
 }
 
-int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		struct posix_acl *dfacl)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -198,6 +198,15 @@ out:
 	return status;
 }
 
+int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl)
+{
+	int ret;
+	ret = __nfs3_proc_setacls(inode, acl, dfacl);
+	return (ret == -EOPNOTSUPP) ? 0 : ret;
+
+}
+
 int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	struct posix_acl *alloc = NULL, *dfacl = NULL;
@@ -225,7 +234,7 @@ int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		if (IS_ERR(alloc))
 			goto fail;
 	}
-	status = nfs3_proc_setacls(inode, acl, dfacl);
+	status = __nfs3_proc_setacls(inode, acl, dfacl);
 	posix_acl_release(alloc);
 	return status;
 

From afca50132cfa7bfc5646676d8c67dc18454f38f8 Mon Sep 17 00:00:00 2001
From: Mukesh Rathor <mukesh.rathor@oracle.com>
Date: Wed, 29 Jan 2014 16:15:18 -0800
Subject: [PATCH 071/171] xen/pvh: set CR4 flags for APs

During bootup in the 'probe_page_size_mask' these CR4 flags are
set in there. But for AP processors they are not set as we do not
use 'secondary_startup_64' which the baremetal kernels uses.
Instead do it in this function which we use in Xen PVH during our
startup for AP processors.

As such fix it up to make sure we have that flag set.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4d7b647867f..201d09a7c46b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1473,6 +1473,18 @@ static void xen_pvh_set_cr_flags(int cpu)
 	 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
 	 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
 	write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
+
+	if (!cpu)
+		return;
+	/*
+	 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
+	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init.
+	*/
+	if (cpu_has_pse)
+		set_in_cr4(X86_CR4_PSE);
+
+	if (cpu_has_pge)
+		set_in_cr4(X86_CR4_PGE);
 }
 
 /*

From 2d7c1b77dd59387070aab355532dd157f888325c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Feb 2014 02:22:07 +0100
Subject: [PATCH 072/171] ACPI / hotplug / PCI: Remove entries from
 bus->devices in reverse order

According to the changelog of commit 29ed1f29b68a (PCI: pciehp: Fix null
pointer deref when hot-removing SR-IOV device) it is unsafe to walk the
bus->devices list of a PCI bus and remove devices from it in direct order,
because that may lead to NULL pointer dereferences related to virtual
functions.

For this reason, change all of the bus->devices list walks in
acpiphp_glue.c during which devices may be removed to be carried out in
reverse order.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index cd929aed3613..6a4b4b734fbd 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -742,7 +742,7 @@ static void trim_stale_devices(struct pci_dev *dev)
 
 		/* The device is a bridge. so check the bus below it. */
 		pm_runtime_get_sync(&dev->dev);
-		list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
+		list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list)
 			trim_stale_devices(child);
 
 		pm_runtime_put(&dev->dev);
@@ -773,8 +773,8 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 			; /* do nothing */
 		} else if (get_slot_status(slot) == ACPI_STA_ALL) {
 			/* remove stale devices if any */
-			list_for_each_entry_safe(dev, tmp, &bus->devices,
-						 bus_list)
+			list_for_each_entry_safe_reverse(dev, tmp,
+							 &bus->devices, bus_list)
 				if (PCI_SLOT(dev->devfn) == slot->device)
 					trim_stale_devices(dev);
 
@@ -805,7 +805,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
 	int i;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
 
-	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+	list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
 		for (i=0; i<PCI_BRIDGE_RESOURCES; i++) {
 			struct resource *res = &dev->resource[i];
 			if ((res->flags & type_mask) && !res->start &&

From f41b32613138ae05329a0f0e7170223b775d6b24 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Feb 2014 02:22:17 +0100
Subject: [PATCH 073/171] ACPI / hotplug / PCI: Move PCI rescan-remove locking
 to hotplug_event()

Commit 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove
locking) modified ACPIPHP to protect its PCI device removal and addition
code paths from races against sysfs-driven rescan and remove operations
with the help of PCI rescan-remove locking.  However, it overlooked the
fact that hotplug_event_work() is not the only caller of hotplug_event()
which may also be called by dock_hotplug_event() and that code path
is missing the PCI rescan-remove locking.  This means that, although
the PCI rescan-remove lock is held as appropriate during the handling
of events originating from handle_hotplug_event(), the ACPIPHP's
operations resulting from dock events may still suffer the race
conditions that commit 9217a984671e was supposed to eliminate.

To address that problem, move the PCI rescan-remove locking from
hotplug_event_work() to hotplug_event() so that it is used regardless
of the way that function is invoked.

Revamps: 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove locking)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 6a4b4b734fbd..6e5bd79af810 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -852,6 +852,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data)
 
 	mutex_unlock(&acpiphp_context_lock);
 
+	pci_lock_rescan_remove();
 	acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
 
 	switch (type) {
@@ -905,6 +906,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data)
 		break;
 	}
 
+	pci_unlock_rescan_remove();
 	if (bridge)
 		put_bridge(bridge);
 }
@@ -915,11 +917,9 @@ static void hotplug_event_work(void *data, u32 type)
 	acpi_handle handle = context->handle;
 
 	acpi_scan_lock_acquire();
-	pci_lock_rescan_remove();
 
 	hotplug_event(handle, type, context);
 
-	pci_unlock_rescan_remove();
 	acpi_scan_lock_release();
 	acpi_evaluate_hotplug_ost(handle, type, ACPI_OST_SC_SUCCESS, NULL);
 	put_bridge(context->func.parent);

From d42f5da2340083301dd2c48ff2d75f6ce4b30767 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Feb 2014 02:22:27 +0100
Subject: [PATCH 074/171] ACPI / hotplug / PCI: Scan root bus under the PCI
 rescan-remove lock

Since acpiphp_check_bridge() called by acpiphp_check_host_bridge()
does things that require PCI rescan-remove locking around it,
make acpiphp_check_host_bridge() use that locking.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 6e5bd79af810..931d0b44eace 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -829,7 +829,11 @@ void acpiphp_check_host_bridge(acpi_handle handle)
 
 	bridge = acpiphp_handle_to_bridge(handle);
 	if (bridge) {
+		pci_lock_rescan_remove();
+
 		acpiphp_check_bridge(bridge);
+
+		pci_unlock_rescan_remove();
 		put_bridge(bridge);
 	}
 }

From 1b360f44d009059e446532f29c1a889951e72667 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Feb 2014 22:30:06 +0100
Subject: [PATCH 075/171] ACPI / hotplug / PCI: Fix bridge removal race in
 handle_hotplug_event()

If a PCI bridge with an ACPIPHP context attached is removed via
sysfs, the code path executed as a result is the following:

pci_stop_and_remove_bus_device_locked
 pci_remove_bus
  pcibios_remove_bus
   acpi_pci_remove_bus
    acpiphp_remove_slots
     cleanup_bridge
     put_bridge
      free_bridge
       acpiphp_put_context (for each child, under context lock)
        kfree (child context)

Now, if a hotplug notify is dispatched for one of the bridge's
children and the timing is such that handle_hotplug_event() for
that notify is executed while free_bridge() above is running,
the get_bridge(context->func.parent) in handle_hotplug_event()
will not really help, because it is too late to prevent the bridge
from going away and the child's context may be freed before
hotplug_event_work() scheduled from handle_hotplug_event()
dereferences the pointer to it passed via the data argument.
That will cause a kernel crash to happpen in hotplug_event_work().

To prevent that from happening, make handle_hotplug_event()
check the is_going_away flag of the function's parent bridge
(under acpiphp_context_lock) and bail out if it's set.  Also,
make cleanup_bridge() set the bridge's is_going_away flag under
acpiphp_context_lock so that it cannot be changed between the
check and the subsequent get_bridge(context->func.parent) in
handle_hotplug_event().

Then, in the above scenario, handle_hotplug_event() will notice
that context->func.parent->is_going_away is already set and it
will exit immediately preventing the crash from happening.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 931d0b44eace..91eceaf3131b 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -441,7 +441,9 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
 	list_del(&bridge->list);
 	mutex_unlock(&bridge_mutex);
 
+	mutex_lock(&acpiphp_context_lock);
 	bridge->is_going_away = true;
+	mutex_unlock(&acpiphp_context_lock);
 }
 
 /**
@@ -941,6 +943,7 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 {
 	struct acpiphp_context *context;
 	u32 ost_code = ACPI_OST_SC_SUCCESS;
+	acpi_status status;
 
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
@@ -976,13 +979,20 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 
 	mutex_lock(&acpiphp_context_lock);
 	context = acpiphp_get_context(handle);
-	if (context && !WARN_ON(context->handle != handle)) {
-		get_bridge(context->func.parent);
-		acpiphp_put_context(context);
-		acpi_hotplug_execute(hotplug_event_work, context, type);
+	if (!context || WARN_ON(context->handle != handle)
+	    || context->func.parent->is_going_away)
+		goto err_out;
+
+	get_bridge(context->func.parent);
+	acpiphp_put_context(context);
+	status = acpi_hotplug_execute(hotplug_event_work, context, type);
+	if (ACPI_SUCCESS(status)) {
 		mutex_unlock(&acpiphp_context_lock);
 		return;
 	}
+	put_bridge(context->func.parent);
+
+ err_out:
 	mutex_unlock(&acpiphp_context_lock);
 	ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 

From af9d8adc6b832003bbe3d83fde665ae6b4f072eb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Feb 2014 22:30:15 +0100
Subject: [PATCH 076/171] ACPI / hotplug / PCI: Fix bridge removal race vs dock
 events

If a PCI bridge with an ACPIPHP context attached is removed via
sysfs, the code path executed as a result is the following:

pci_stop_and_remove_bus_device_locked
 pci_remove_bus
  pcibios_remove_bus
   acpi_pci_remove_bus
    acpiphp_remove_slots
     cleanup_bridge
      unregister_hotplug_dock_device (drops dock references to the bridge)
     put_bridge
      free_bridge
       acpiphp_put_context (for each child, under context lock)
        kfree (context)

Now, if a dock event affecting one of the bridge's child devices
occurs (roughly at the same time), it will lead to the following code
path:

acpi_dock_deferred_cb
 dock_notify
  handle_eject_request
   hot_remove_dock_devices
    dock_hotplug_event
     hotplug_event (dereferences context)

That may lead to a kernel crash in hotplug_event() if it is executed
after the last kfree() in the bridge removal code path.

To prevent that from happening, add a wrapper around hotplug_event()
called dock_event() and point the .handler pointer in acpiphp_dock_ops
to it.  Make that wrapper retrieve the device's ACPIPHP context using
acpiphp_get_context() (instead of taking it from the data argument)
under acpiphp_context_lock and check if the parent bridge's
is_going_away flag is set.  If that flag is set, it will return
immediately and if it is not set it will grab a reference to the
device's parent bridge before executing hotplug_event().

Then, in the above scenario, the reference to the parent bridge
held by dock_event() will prevent free_bridge() from being executed
for it until hotplug_event() returns.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 91eceaf3131b..e2a783fdb98f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -210,10 +210,29 @@ static void post_dock_fixups(acpi_handle not_used, u32 event, void *data)
 	}
 }
 
+static void dock_event(acpi_handle handle, u32 type, void *data)
+{
+	struct acpiphp_context *context;
+
+	mutex_lock(&acpiphp_context_lock);
+	context = acpiphp_get_context(handle);
+	if (!context || WARN_ON(context->handle != handle)
+	    || context->func.parent->is_going_away) {
+		mutex_unlock(&acpiphp_context_lock);
+		return;
+	}
+	get_bridge(context->func.parent);
+	acpiphp_put_context(context);
+	mutex_unlock(&acpiphp_context_lock);
+
+	hotplug_event(handle, type, data);
+
+	put_bridge(context->func.parent);
+}
 
 static const struct acpi_dock_ops acpiphp_dock_ops = {
 	.fixup = post_dock_fixups,
-	.handler = hotplug_event,
+	.handler = dock_event,
 };
 
 /* Check whether the PCI device is managed by native PCIe hotplug driver */

From 789b663ae3d427ea9c50505339a13276e7228c9d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 31 Jan 2014 14:25:19 -0500
Subject: [PATCH 077/171] fs: get_acl() must be allowed to return EOPNOTSUPP

posix_acl_xattr_get requires get_acl() to return EOPNOTSUPP if the
filesystem cannot support acls. This is needed for NFS, which can't
know whether or not the server supports acls until it tries to get/set
one.
This patch converts posix_acl_chmod and posix_acl_create to deal with
EOPNOTSUPP return values from get_acl().

Reported-by: Russell King <linux@arm.linux.org.uk>
Link: http://lkml.kernel.org/r/20140130140834.GW15937@n2100.arm.linux.org.uk
Cc: Al Viro viro@zeniv.linux.org.uk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/posix_acl.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 38bae5a0ea25..11c54fd51e16 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -521,8 +521,11 @@ posix_acl_chmod(struct inode *inode, umode_t mode)
 		return -EOPNOTSUPP;
 
 	acl = get_acl(inode, ACL_TYPE_ACCESS);
-	if (IS_ERR_OR_NULL(acl))
+	if (IS_ERR_OR_NULL(acl)) {
+		if (acl == ERR_PTR(-EOPNOTSUPP))
+			return 0;
 		return PTR_ERR(acl);
+	}
 
 	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
 	if (ret)
@@ -544,14 +547,15 @@ posix_acl_create(struct inode *dir, umode_t *mode,
 		goto no_acl;
 
 	p = get_acl(dir, ACL_TYPE_DEFAULT);
-	if (IS_ERR(p))
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EOPNOTSUPP))
+			goto apply_umask;
 		return PTR_ERR(p);
-
-	if (!p) {
-		*mode &= ~current_umask();
-		goto no_acl;
 	}
 
+	if (!p)
+		goto apply_umask;
+
 	*acl = posix_acl_clone(p, GFP_NOFS);
 	if (!*acl)
 		return -ENOMEM;
@@ -575,6 +579,8 @@ posix_acl_create(struct inode *dir, umode_t *mode,
 	}
 	return 0;
 
+apply_umask:
+	*mode &= ~current_umask();
 no_acl:
 	*default_acl = NULL;
 	*acl = NULL;

From 4528eb19b00d9ccd65ded6f8201eec704267edd8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 4 Feb 2014 07:39:06 +0100
Subject: [PATCH 078/171] ALSA: hda - Fix silent output on Toshiba Satellite
 L40

Toshiba Satellite L40 with AD1986A codec requires the EAPD of NID 0x1b
to be constantly on, otherwise the output doesn't work.
Unlike most of other AD1986A machines, EAPD is correctly implemented
in HD-audio manner (that is, bit set = amp on), so we need to clear
the inv_eapd flag in the fixup, too.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=67481
Cc: <stable@vger.kernel.org> [v3.11+]
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_analog.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 7a426ed491f2..50b2427f19ca 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -244,6 +244,19 @@ static void ad_fixup_inv_jack_detect(struct hda_codec *codec,
 	}
 }
 
+/* Toshiba Satellite L40 implements EAPD in a standard way unlike others */
+static void ad1986a_fixup_eapd(struct hda_codec *codec,
+			       const struct hda_fixup *fix, int action)
+{
+	struct ad198x_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		codec->inv_eapd = 0;
+		spec->gen.keep_eapd_on = 1;
+		spec->eapd_nid = 0x1b;
+	}
+}
+
 enum {
 	AD1986A_FIXUP_INV_JACK_DETECT,
 	AD1986A_FIXUP_ULTRA,
@@ -251,6 +264,7 @@ enum {
 	AD1986A_FIXUP_3STACK,
 	AD1986A_FIXUP_LAPTOP,
 	AD1986A_FIXUP_LAPTOP_IMIC,
+	AD1986A_FIXUP_EAPD,
 };
 
 static const struct hda_fixup ad1986a_fixups[] = {
@@ -311,6 +325,10 @@ static const struct hda_fixup ad1986a_fixups[] = {
 		.chained_before = 1,
 		.chain_id = AD1986A_FIXUP_LAPTOP,
 	},
+	[AD1986A_FIXUP_EAPD] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = ad1986a_fixup_eapd,
+	},
 };
 
 static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
@@ -318,6 +336,7 @@ static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
 	SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK),
 	SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8200, "ASUS M2", AD1986A_FIXUP_3STACK),
 	SND_PCI_QUIRK(0x10de, 0xcb84, "ASUS A8N-VM", AD1986A_FIXUP_3STACK),
+	SND_PCI_QUIRK(0x1179, 0xff40, "Toshiba Satellite L40", AD1986A_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_FIXUP_LAPTOP),
 	SND_PCI_QUIRK_MASK(0x144d, 0xff00, 0xc000, "Samsung", AD1986A_FIXUP_SAMSUNG),
 	SND_PCI_QUIRK(0x144d, 0xc027, "Samsung Q1", AD1986A_FIXUP_ULTRA),

From 7e8f15c5aa9b8021ee933336f3e055f279482051 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Thu, 9 Jan 2014 09:55:43 -0300
Subject: [PATCH 079/171] [media] s5k5baf: allow to handle arbitrary long i2c
 sequences

Using variable length array in s5k5baf_write_arr_seq caused
an implicit assumption that i2c sequences should be short.
The patch rewrites the function so it can handle sequences
of any length and does not use variable length array.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/i2c/s5k5baf.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/media/i2c/s5k5baf.c b/drivers/media/i2c/s5k5baf.c
index 4b8381111cbd..77e10e0fd8d6 100644
--- a/drivers/media/i2c/s5k5baf.c
+++ b/drivers/media/i2c/s5k5baf.c
@@ -478,25 +478,33 @@ static void s5k5baf_write_arr_seq(struct s5k5baf *state, u16 addr,
 				  u16 count, const u16 *seq)
 {
 	struct i2c_client *c = v4l2_get_subdevdata(&state->sd);
-	__be16 buf[count + 1];
-	int ret, n;
+	__be16 buf[65];
 
 	s5k5baf_i2c_write(state, REG_CMDWR_ADDR, addr);
 	if (state->error)
 		return;
 
-	buf[0] = __constant_cpu_to_be16(REG_CMD_BUF);
-	for (n = 1; n <= count; ++n)
-		buf[n] = cpu_to_be16(*seq++);
-
-	n *= 2;
-	ret = i2c_master_send(c, (char *)buf, n);
 	v4l2_dbg(3, debug, c, "i2c_write_seq(count=%d): %*ph\n", count,
-		 min(2 * count, 64), seq - count);
+		 min(2 * count, 64), seq);
 
-	if (ret != n) {
-		v4l2_err(c, "i2c_write_seq: error during transfer (%d)\n", ret);
-		state->error = ret;
+	buf[0] = __constant_cpu_to_be16(REG_CMD_BUF);
+
+	while (count > 0) {
+		int n = min_t(int, count, ARRAY_SIZE(buf) - 1);
+		int ret, i;
+
+		for (i = 1; i <= n; ++i)
+			buf[i] = cpu_to_be16(*seq++);
+
+		i *= 2;
+		ret = i2c_master_send(c, (char *)buf, i);
+		if (ret != i) {
+			v4l2_err(c, "i2c_write_seq: error during transfer (%d)\n", ret);
+			state->error = ret;
+			break;
+		}
+
+		count -= n;
 	}
 }
 

From a62cffefc9a327a5bc45ba9318ef601b525f4bd1 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Thu, 16 Jan 2014 08:26:33 -0300
Subject: [PATCH 080/171] [media] s5p-jpeg: Fix wrong NV12 format parameters

NV12 format entries in the sjpeg_formats array had wrong
colplanes, depth and v_align values.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/platform/s5p-jpeg/jpeg-core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index a1c78c870b68..7d68d0b9966a 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -175,7 +175,7 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 	{
 		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
-		.depth		= 16,
+		.depth		= 12,
 		.colplanes	= 2,
 		.h_align	= 1,
 		.v_align	= 1,
@@ -188,10 +188,10 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 	{
 		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
-		.depth		= 16,
-		.colplanes	= 4,
+		.depth		= 12,
+		.colplanes	= 2,
 		.h_align	= 4,
-		.v_align	= 1,
+		.v_align	= 4,
 		.flags		= SJPEG_FMT_FLAG_ENC_OUTPUT |
 				  SJPEG_FMT_FLAG_DEC_CAPTURE |
 				  SJPEG_FMT_FLAG_S5P |

From a27a19d61535ce61a1e2f9274f99316c7532d5ef Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 7 Jan 2014 19:07:52 -0300
Subject: [PATCH 081/171] [media] exynos4-is: Fix error paths in probe() for
 !pm_runtime_enabled()

Ensure clk_disable() is called on error paths only when clk_enable()
was previously called.

This fixes following build warning:

.../media-git/drivers/media/platform/exynos4-is/fimc-lite.c: In function 'fimc_lite_probe':
.../media-git/drivers/media/platform/exynos4-is/fimc-lite.c:1583:1: warning: label 'err_sd' defined but not used [-Wunused-label]

Reported-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/platform/exynos4-is/fimc-core.c | 3 ++-
 drivers/media/platform/exynos4-is/fimc-lite.c | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c
index a7dfd07e8389..dcbea59c8c69 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.c
+++ b/drivers/media/platform/exynos4-is/fimc-core.c
@@ -1027,7 +1027,8 @@ static int fimc_probe(struct platform_device *pdev)
 	return 0;
 
 err_gclk:
-	clk_disable(fimc->clock[CLK_GATE]);
+	if (!pm_runtime_enabled(dev))
+		clk_disable(fimc->clock[CLK_GATE]);
 err_sd:
 	fimc_unregister_capture_subdev(fimc);
 err_sclk:
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index 1234734bccf4..5213ff03d28a 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -1563,7 +1563,7 @@ static int fimc_lite_probe(struct platform_device *pdev)
 	if (!pm_runtime_enabled(dev)) {
 		ret = clk_enable(fimc->clock);
 		if (ret < 0)
-			goto err_clk_put;
+			goto err_sd;
 	}
 
 	fimc->alloc_ctx = vb2_dma_contig_init_ctx(dev);
@@ -1579,7 +1579,8 @@ static int fimc_lite_probe(struct platform_device *pdev)
 	return 0;
 
 err_clk_dis:
-	clk_disable(fimc->clock);
+	if (!pm_runtime_enabled(dev))
+		clk_disable(fimc->clock);
 err_sd:
 	fimc_lite_unregister_capture_subdev(fimc);
 err_clk_put:

From d003a30dd7401f38c876bb97e3e51f4fd9fed15e Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 7 Jan 2014 19:08:48 -0300
Subject: [PATCH 082/171] [media] exynos4-is: Compile in fimc runtime PM
 callbacks conditionally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enclose the runtime PM helpers in #ifdef CONFIG_PM_RUNTIME/#endif
to avoid following compile warning when CONFIG_PM_RUNTIME is disabled:

 CC      drivers/media/platform/exynos4-is/fimc-core.o
drivers/media/platform/exynos4-is/fimc-core.c:1040:12: warning: ‘fimc_runtime_resume’ defined but not used
drivers/media/platform/exynos4-is/fimc-core.c:1057:12: warning: ‘fimc_runtime_suspend’ defined but not used

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/platform/exynos4-is/fimc-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c
index dcbea59c8c69..da2fc86cc524 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.c
+++ b/drivers/media/platform/exynos4-is/fimc-core.c
@@ -1037,6 +1037,7 @@ err_sclk:
 	return ret;
 }
 
+#ifdef CONFIG_PM_RUNTIME
 static int fimc_runtime_resume(struct device *dev)
 {
 	struct fimc_dev *fimc =	dev_get_drvdata(dev);
@@ -1069,6 +1070,7 @@ static int fimc_runtime_suspend(struct device *dev)
 	dbg("fimc%d: state: 0x%lx", fimc->id, fimc->state);
 	return ret;
 }
+#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int fimc_resume(struct device *dev)

From 656e62dc84a38fa847d9501636acb098a32d6f89 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 16 Jan 2014 11:49:28 -0300
Subject: [PATCH 083/171] [media] exynos4-is: Compile in fimc-lite runtime PM
 callbacks conditionally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enclose the runtime PM helpers in #ifdef CONFIG_PM_RUNTIME/#endif
to avoid following compile warning when CONFIG_PM_RUNTIME is disabled:

CC      drivers/media/platform/exynos4-is/fimc-lite.o
drivers/media/platform/exynos4-is/fimc-lite.c:1591:12: warning: ‘fimc_lite_runtime_resume’ defined but not used [-Wunused-function]
drivers/media/platform/exynos4-is/fimc-lite.c:1599:12: warning: ‘fimc_lite_runtime_suspend’ defined but not used [-Wunused-function]

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/platform/exynos4-is/fimc-lite.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index 5213ff03d28a..779ec3cd259d 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -1588,6 +1588,7 @@ err_clk_put:
 	return ret;
 }
 
+#ifdef CONFIG_PM_RUNTIME
 static int fimc_lite_runtime_resume(struct device *dev)
 {
 	struct fimc_lite *fimc = dev_get_drvdata(dev);
@@ -1603,6 +1604,7 @@ static int fimc_lite_runtime_suspend(struct device *dev)
 	clk_disable(fimc->clock);
 	return 0;
 }
+#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int fimc_lite_resume(struct device *dev)

From 38121b6ef314e556d3b59acc0c41a4d6f7e597fa Mon Sep 17 00:00:00 2001
From: Levente Kurusa <levex@linux.com>
Date: Thu, 19 Dec 2013 12:06:49 -0300
Subject: [PATCH 084/171] [media] media: bt8xx: add missing put_device call

This is required so that we give up the last reference to the device.
Remove the kfree() because the put_device() call will actually call
release_sub_device which in turn kfrees the device.

Signed-off-by: Levente Kurusa <levex@linux.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/pci/bt8xx/bttv-gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/pci/bt8xx/bttv-gpio.c b/drivers/media/pci/bt8xx/bttv-gpio.c
index 922e8233fd0b..3f364b7062b9 100644
--- a/drivers/media/pci/bt8xx/bttv-gpio.c
+++ b/drivers/media/pci/bt8xx/bttv-gpio.c
@@ -98,7 +98,7 @@ int bttv_sub_add_device(struct bttv_core *core, char *name)
 
 	err = device_register(&sub->dev);
 	if (0 != err) {
-		kfree(sub);
+		put_device(&sub->dev);
 		return err;
 	}
 	pr_info("%d: add subdevice \"%s\"\n", core->nr, dev_name(&sub->dev));

From 50c88544d225baadd6de1c4365d4ed16cc942a37 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Fri, 20 Dec 2013 16:17:32 -0300
Subject: [PATCH 085/171] [media] go7007-loader: fix usb_dev leak

There is usb_get_dev() in go7007_loader_probe(),
but there is no usb_put_dev() anywhere.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/staging/media/go7007/go7007-loader.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/go7007/go7007-loader.c b/drivers/staging/media/go7007/go7007-loader.c
index 10bb41c2fb6d..eecb1f2a5574 100644
--- a/drivers/staging/media/go7007/go7007-loader.c
+++ b/drivers/staging/media/go7007/go7007-loader.c
@@ -59,7 +59,7 @@ static int go7007_loader_probe(struct usb_interface *interface,
 
 	if (usbdev->descriptor.bNumConfigurations != 1) {
 		dev_err(&interface->dev, "can't handle multiple config\n");
-		return -ENODEV;
+		goto failed2;
 	}
 
 	vendor = le16_to_cpu(usbdev->descriptor.idVendor);
@@ -108,6 +108,7 @@ static int go7007_loader_probe(struct usb_interface *interface,
 	return 0;
 
 failed2:
+	usb_put_dev(usbdev);
 	dev_err(&interface->dev, "probe failed\n");
 	return -ENODEV;
 }
@@ -115,6 +116,7 @@ failed2:
 static void go7007_loader_disconnect(struct usb_interface *interface)
 {
 	dev_info(&interface->dev, "disconnect\n");
+	usb_put_dev(interface_to_usbdev(interface));
 	usb_set_intfdata(interface, NULL);
 }
 

From cca36e2eecec2b8fc869a50ffd3bd0adeed92b8b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 3 Jan 2014 08:10:49 -0300
Subject: [PATCH 086/171] [media] Revert "[media] videobuf_vm_{open,close} race
 fixes"

This reverts commit a242f426108c284049a69710f871cc9f11b13e61.

That commit actually caused deadlocks, rather then fixing them.

If ext_lock is set to NULL (otherwise videobuf_queue_lock doesn't do
anything), then you get this deadlock:

The driver's mmap function calls videobuf_mmap_mapper which calls
videobuf_queue_lock on q. videobuf_mmap_mapper calls  __videobuf_mmap_mapper,
__videobuf_mmap_mapper calls videobuf_vm_open and videobuf_vm_open
calls videobuf_queue_lock on q (introduced by above patch): deadlocked.

This affects drivers using dma-contig and dma-vmalloc. Only dma-sg is
not affected since it doesn't call videobuf_vm_open from __videobuf_mmap_mapper.

Most drivers these days have a non-NULL ext_lock. Those that still use
NULL there are all fairly obscure drivers, which is why this hasn't been
seen earlier.

Since everything worked perfectly fine for many years I prefer to just
revert this patch rather than trying to fix it. videobuf is quite fragile
and I rather not touch it too much. Work is (slowly) progressing to move
everything over to vb2 or at the very least use non-NULL ext_lock in
videobuf.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: <stable@vger.kernel.org>      # for v3.11 and up
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Reported-by: Pete Eberlein <pete@sensoray.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/videobuf-dma-contig.c | 12 +++++-------
 drivers/media/v4l2-core/videobuf-dma-sg.c     | 10 ++++------
 drivers/media/v4l2-core/videobuf-vmalloc.c    | 10 ++++------
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c
index 65411adcd0ea..7e6b209b7002 100644
--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
@@ -66,14 +66,11 @@ static void __videobuf_dc_free(struct device *dev,
 static void videobuf_vm_open(struct vm_area_struct *vma)
 {
 	struct videobuf_mapping *map = vma->vm_private_data;
-	struct videobuf_queue *q = map->q;
 
-	dev_dbg(q->dev, "vm_open %p [count=%u,vma=%08lx-%08lx]\n",
+	dev_dbg(map->q->dev, "vm_open %p [count=%u,vma=%08lx-%08lx]\n",
 		map, map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
 	map->count++;
-	videobuf_queue_unlock(q);
 }
 
 static void videobuf_vm_close(struct vm_area_struct *vma)
@@ -85,11 +82,12 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	dev_dbg(q->dev, "vm_close %p [count=%u,vma=%08lx-%08lx]\n",
 		map, map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
-	if (!--map->count) {
+	map->count--;
+	if (0 == map->count) {
 		struct videobuf_dma_contig_memory *mem;
 
 		dev_dbg(q->dev, "munmap %p q=%p\n", map, q);
+		videobuf_queue_lock(q);
 
 		/* We need first to cancel streams, before unmapping */
 		if (q->streaming)
@@ -128,8 +126,8 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 
 		kfree(map);
 
+		videobuf_queue_unlock(q);
 	}
-	videobuf_queue_unlock(q);
 }
 
 static const struct vm_operations_struct videobuf_vm_ops = {
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 9db674ccdc68..828e7c10bd70 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -338,14 +338,11 @@ EXPORT_SYMBOL_GPL(videobuf_dma_free);
 static void videobuf_vm_open(struct vm_area_struct *vma)
 {
 	struct videobuf_mapping *map = vma->vm_private_data;
-	struct videobuf_queue *q = map->q;
 
 	dprintk(2, "vm_open %p [count=%d,vma=%08lx-%08lx]\n", map,
 		map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
 	map->count++;
-	videobuf_queue_unlock(q);
 }
 
 static void videobuf_vm_close(struct vm_area_struct *vma)
@@ -358,9 +355,10 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	dprintk(2, "vm_close %p [count=%d,vma=%08lx-%08lx]\n", map,
 		map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
-	if (!--map->count) {
+	map->count--;
+	if (0 == map->count) {
 		dprintk(1, "munmap %p q=%p\n", map, q);
+		videobuf_queue_lock(q);
 		for (i = 0; i < VIDEO_MAX_FRAME; i++) {
 			if (NULL == q->bufs[i])
 				continue;
@@ -376,9 +374,9 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 			q->bufs[i]->baddr = 0;
 			q->ops->buf_release(q, q->bufs[i]);
 		}
+		videobuf_queue_unlock(q);
 		kfree(map);
 	}
-	videobuf_queue_unlock(q);
 	return;
 }
 
diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c
index 1365c651c177..2ff7fcc77b11 100644
--- a/drivers/media/v4l2-core/videobuf-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf-vmalloc.c
@@ -54,14 +54,11 @@ MODULE_LICENSE("GPL");
 static void videobuf_vm_open(struct vm_area_struct *vma)
 {
 	struct videobuf_mapping *map = vma->vm_private_data;
-	struct videobuf_queue *q = map->q;
 
 	dprintk(2, "vm_open %p [count=%u,vma=%08lx-%08lx]\n", map,
 		map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
 	map->count++;
-	videobuf_queue_unlock(q);
 }
 
 static void videobuf_vm_close(struct vm_area_struct *vma)
@@ -73,11 +70,12 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	dprintk(2, "vm_close %p [count=%u,vma=%08lx-%08lx]\n", map,
 		map->count, vma->vm_start, vma->vm_end);
 
-	videobuf_queue_lock(q);
-	if (!--map->count) {
+	map->count--;
+	if (0 == map->count) {
 		struct videobuf_vmalloc_memory *mem;
 
 		dprintk(1, "munmap %p q=%p\n", map, q);
+		videobuf_queue_lock(q);
 
 		/* We need first to cancel streams, before unmapping */
 		if (q->streaming)
@@ -116,8 +114,8 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 
 		kfree(map);
 
+		videobuf_queue_unlock(q);
 	}
-	videobuf_queue_unlock(q);
 
 	return;
 }

From 548df7831adc34eca3ccefa29f768cef84315d6e Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Wed, 8 Jan 2014 05:01:33 -0300
Subject: [PATCH 087/171] [media] vb2: Check if there are buffers before
 streamon

This patch adds a test preventing streamon() if there is no buffer
ready.

Without this patch, a user could call streamon() before
preparing any buffer. This leads to a situation where if he calls
close() before calling streamoff() the device is kept streaming.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Reviewed-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/videobuf2-core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 5a5fb7f09b7b..a127925c9d61 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1776,6 +1776,11 @@ static int vb2_internal_streamon(struct vb2_queue *q, enum v4l2_buf_type type)
 		return 0;
 	}
 
+	if (!q->num_buffers) {
+		dprintk(1, "streamon: no buffers have been allocated\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * If any buffers were queued before streamon,
 	 * we can now pass them to driver for processing.

From 08e10972661db78d0e0a2d4a4c28fc3bf93617ba Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@linuxtv.org>
Date: Wed, 29 Jan 2014 23:10:11 -0300
Subject: [PATCH 088/171] [media] update Michael Krufky's email address

I am no longer available at the kernellabs.com or m1k.net email
addresses.  Update each instance of my email to my linuxtv.org
account.

Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/dvb/contributors.txt            | 2 +-
 drivers/media/dvb-frontends/nxt200x.c         | 2 +-
 drivers/media/pci/bt8xx/bttv-cards.c          | 2 +-
 drivers/media/pci/saa7134/saa7134-cards.c     | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c | 4 ++--
 drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c  | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h  | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c   | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h   | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c   | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h   | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h   | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c | 4 ++--
 drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h | 2 +-
 drivers/media/usb/dvb-usb-v2/mxl111sf.c       | 4 ++--
 drivers/media/usb/dvb-usb-v2/mxl111sf.h       | 2 +-
 17 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/Documentation/dvb/contributors.txt b/Documentation/dvb/contributors.txt
index 47c30098dab6..731a009723c7 100644
--- a/Documentation/dvb/contributors.txt
+++ b/Documentation/dvb/contributors.txt
@@ -78,7 +78,7 @@ Peter Beutner <p.beutner@gmx.net>
 Wilson Michaels <wilsonmichaels@earthlink.net>
   for the lgdt330x frontend driver, and various bugfixes
 
-Michael Krufky <mkrufky@m1k.net>
+Michael Krufky <mkrufky@linuxtv.org>
   for maintaining v4l/dvb inter-tree dependencies
 
 Taylor Jacob <rtjacob@earthlink.net>
diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c
index 4bf057544607..8a8e1ecb762d 100644
--- a/drivers/media/dvb-frontends/nxt200x.c
+++ b/drivers/media/dvb-frontends/nxt200x.c
@@ -2,7 +2,7 @@
  *    Support for NXT2002 and NXT2004 - VSB/QAM
  *
  *    Copyright (C) 2005 Kirk Lapray <kirk.lapray@gmail.com>
- *    Copyright (C) 2006 Michael Krufky <mkrufky@m1k.net>
+ *    Copyright (C) 2006-2014 Michael Krufky <mkrufky@linuxtv.org>
  *    based on nxt2002 by Taylor Jacob <rtjacob@earthlink.net>
  *    and nxt2004 by Jean-Francois Thibert <jeanfrancois@sagetv.com>
  *
diff --git a/drivers/media/pci/bt8xx/bttv-cards.c b/drivers/media/pci/bt8xx/bttv-cards.c
index d85cb0ace4dc..6662b495b22c 100644
--- a/drivers/media/pci/bt8xx/bttv-cards.c
+++ b/drivers/media/pci/bt8xx/bttv-cards.c
@@ -2426,7 +2426,7 @@ struct tvcard bttv_tvcards[] = {
 	},
 		/* ---- card 0x87---------------------------------- */
 	[BTTV_BOARD_DVICO_FUSIONHDTV_5_LITE] = {
-		/* Michael Krufky <mkrufky@m1k.net> */
+		/* Michael Krufky <mkrufky@linuxtv.org> */
 		.name           = "DViCO FusionHDTV 5 Lite",
 		.tuner_type     = TUNER_LG_TDVS_H06XF, /* TDVS-H064F */
 		.tuner_addr	= ADDR_UNSET,
diff --git a/drivers/media/pci/saa7134/saa7134-cards.c b/drivers/media/pci/saa7134/saa7134-cards.c
index d45e7f6ff332..c9b2350e92c8 100644
--- a/drivers/media/pci/saa7134/saa7134-cards.c
+++ b/drivers/media/pci/saa7134/saa7134-cards.c
@@ -2590,7 +2590,7 @@ struct saa7134_board saa7134_boards[] = {
 		}},
 	},
 	[SAA7134_BOARD_AVERMEDIA_AVERTVHD_A180] = {
-		/* Michael Krufky <mkrufky@m1k.net>
+		/* Michael Krufky <mkrufky@linuxtv.org>
 		 * Uses Alps Electric TDHU2, containing NXT2004 ATSC Decoder
 		 * AFAIK, there is no analog demod, thus,
 		 * no support for analog television.
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
index d83df4bb72d3..0a98d04c53e4 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-demod.c - driver for the MaxLinear MXL111SF DVB-T demodulator
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -601,7 +601,7 @@ struct dvb_frontend *mxl111sf_demod_attach(struct mxl111sf_state *mxl_state,
 EXPORT_SYMBOL_GPL(mxl111sf_demod_attach);
 
 MODULE_DESCRIPTION("MaxLinear MxL111SF DVB-T demodulator driver");
-MODULE_AUTHOR("Michael Krufky <mkrufky@kernellabs.com>");
+MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("0.1");
 
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h
index 3f3f8bfd190b..2d4530f5be54 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-demod.h - driver for the MaxLinear MXL111SF DVB-T demodulator
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c
index e4121cb8f5ef..a619410adde4 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-gpio.c - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h
index 0220f54299a5..b85a5772d771 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-gpio.h - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c
index 34434557ef65..a101d06eb143 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-i2c.c - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h
index a57a45ffb9e4..465762145ad2 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-i2c.h - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c
index b741b3a7a325..f6b348024bec 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-phy.c - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h
index f0756071d347..0643738de7de 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-phy.h - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h
index 17831b0fb9db..89bf115e927e 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-reg.h - driver for the MaxLinear MXL111SF
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
index 879c529640f7..a8d2c7053674 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-tuner.c - driver for the MaxLinear MXL111SF CMOS tuner
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -512,7 +512,7 @@ struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe,
 EXPORT_SYMBOL_GPL(mxl111sf_tuner_attach);
 
 MODULE_DESCRIPTION("MaxLinear MxL111SF CMOS tuner driver");
-MODULE_AUTHOR("Michael Krufky <mkrufky@kernellabs.com>");
+MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("0.1");
 
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
index 90f583e5d6a6..5fc0121736ff 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
@@ -1,7 +1,7 @@
 /*
  *  mxl111sf-tuner.h - driver for the MaxLinear MXL111SF CMOS tuner
  *
- *  Copyright (C) 2010 Michael Krufky <mkrufky@kernellabs.com>
+ *  Copyright (C) 2010-2014 Michael Krufky <mkrufky@linuxtv.org>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
index 08240e498451..8ceff4290bf1 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Michael Krufky (mkrufky@kernellabs.com)
+ * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org)
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms of the GNU General Public License as published by the Free
@@ -1421,7 +1421,7 @@ static struct usb_driver mxl111sf_usb_driver = {
 
 module_usb_driver(mxl111sf_usb_driver);
 
-MODULE_AUTHOR("Michael Krufky <mkrufky@kernellabs.com>");
+MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>");
 MODULE_DESCRIPTION("Driver for MaxLinear MxL111SF");
 MODULE_VERSION("1.0");
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.h b/drivers/media/usb/dvb-usb-v2/mxl111sf.h
index 9816de86e48c..8516c011b7cc 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Michael Krufky (mkrufky@kernellabs.com)
+ * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org)
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms of the GNU General Public License as published by the Free

From 62fd0d30e1fbd12d5a07ff722b2726bb8fa630c7 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi@etezian.org>
Date: Thu, 30 Jan 2014 00:05:01 -0300
Subject: [PATCH 089/171] [media] cx24117: remove dead code in always 'false'
 if statement

At this point of the execution in the function cx24117_attach()
demod cannot be '0'. In that case the function returns earlier
with an error value ('NULL'). Remove the if statement.

This error has been reported by scan.coverity.com

Signed-off-by: Andi Shyti <andi@etezian.org>
Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/dvb-frontends/cx24117.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c
index 68f768a5422d..782dcfe6ae69 100644
--- a/drivers/media/dvb-frontends/cx24117.c
+++ b/drivers/media/dvb-frontends/cx24117.c
@@ -1200,12 +1200,6 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config,
 	state->demod = demod - 1;
 	state->priv = priv;
 
-	/* test i2c bus for ack */
-	if (demod == 0) {
-		if (cx24117_readreg(state, 0x00) < 0)
-			goto error3;
-	}
-
 	dev_info(&state->priv->i2c->dev,
 		"%s: Attaching frontend %d\n",
 		KBUILD_MODNAME, state->demod);
@@ -1216,8 +1210,6 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config,
 	state->frontend.demodulator_priv = state;
 	return &state->frontend;
 
-error3:
-	kfree(state);
 error2:
 	cx24117_release_priv(priv);
 error1:

From a33dd5171d141c378df498aba3fa3c9a573cacb6 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi@etezian.org>
Date: Thu, 30 Jan 2014 00:06:41 -0300
Subject: [PATCH 090/171] [media] cx24117: use a valid dev pointer for dev_err
 printout

Don't use '&state->priv->i2c->dev' reference to device because
state is still 'NULL'. Use '&i2c->dev' instead.

This bug has been reported by scan.coverity.com

Signed-off-by: Andi Shyti <andi@etezian.org>
Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Cc: vger@stable.kernel.org
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/dvb-frontends/cx24117.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c
index 782dcfe6ae69..a6c3c9e2e897 100644
--- a/drivers/media/dvb-frontends/cx24117.c
+++ b/drivers/media/dvb-frontends/cx24117.c
@@ -1176,7 +1176,7 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config,
 
 	switch (demod) {
 	case 0:
-		dev_err(&state->priv->i2c->dev,
+		dev_err(&i2c->dev,
 			"%s: Error attaching frontend %d\n",
 			KBUILD_MODNAME, demod);
 		goto error1;

From 866e8d8a9dc1ebb4f9e67197e264ac2df81f7d4b Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@fedoraproject.org>
Date: Thu, 30 Jan 2014 00:11:33 -0300
Subject: [PATCH 091/171] [media] mxl111sf: Fix unintentional garbage stack
 read

mxl111sf_read_reg takes an address of a variable to write to as an argument.
drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c:mxl111sf_config_pin_mux_modes
passes several uninitialized stack variables to this routine, expecting
them to be filled in.  In the event that something unexpected happens when
reading from the chip, we end up doing a pr_debug of the value passed in,
revealing whatever garbage happened to be on the stack.

Change the pr_debug to match what happens in the 'success' case, where we
assign buf[1] to *data.

Spotted with Coverity (Bugs 731910 through 731917)

Signed-off-by: Dave Jones <davej@fedoraproject.org>
Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/usb/dvb-usb-v2/mxl111sf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
index 8ceff4290bf1..c7304fa8ab73 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c
@@ -105,7 +105,7 @@ int mxl111sf_read_reg(struct mxl111sf_state *state, u8 addr, u8 *data)
 		ret = -EINVAL;
 	}
 
-	pr_debug("R: (0x%02x, 0x%02x)\n", addr, *data);
+	pr_debug("R: (0x%02x, 0x%02x)\n", addr, buf[1]);
 fail:
 	return ret;
 }

From 13e1b87c986100169b0695aeb26970943665eda9 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@fedoraproject.org>
Date: Thu, 30 Jan 2014 00:17:09 -0300
Subject: [PATCH 092/171] [media] mxl111sf: Fix compile when
 CONFIG_DVB_USB_MXL111SF is unset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following build error:

drivers/media/usb/dvb-usb-v2/
mxl111sf-tuner.h:72:9: error: expected ‘;’, ‘,’ or ‘)’ before ‘struct’
         struct mxl111sf_tuner_config *cfg)

Signed-off-by: Dave Jones <davej@fedoraproject.org>
Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
index 5fc0121736ff..2046db22519e 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
@@ -68,7 +68,7 @@ struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe,
 #else
 static inline
 struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe,
-					   struct mxl111sf_state *mxl_state
+					   struct mxl111sf_state *mxl_state,
 					   struct mxl111sf_tuner_config *cfg)
 {
 	printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);

From f2e4c5e004691dfe37d0e4b363296f28abdb9bc7 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Thu, 16 Jan 2014 08:59:30 -0300
Subject: [PATCH 093/171] [media] af9035: add ID [2040:f900] Hauppauge
 WinTV-MiniStick 2

Add USB ID [2040:f900] for Hauppauge WinTV-MiniStick 2.
Device is build upon IT9135 chipset.

Tested-by: Stefan Becker <schtefan@gmx.net>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Cc: stable@vger.kernel.org
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/usb/dvb-usb-v2/af9035.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c
index 8f9b2cea88f0..8ede8ea762e6 100644
--- a/drivers/media/usb/dvb-usb-v2/af9035.c
+++ b/drivers/media/usb/dvb-usb-v2/af9035.c
@@ -1539,6 +1539,8 @@ static const struct usb_device_id af9035_id_table[] = {
 		&af9035_props, "TerraTec Cinergy T Stick Dual RC (rev. 2)", NULL) },
 	{ DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6a05,
 		&af9035_props, "Leadtek WinFast DTV Dongle Dual", NULL) },
+	{ DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xf900,
+		&af9035_props, "Hauppauge WinTV-MiniStick 2", NULL) },
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, af9035_id_table);

From 1ba6c90161ac058f580e6ceb5ccc14dcd86365d1 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Fri, 17 Jan 2014 13:38:00 -0300
Subject: [PATCH 094/171] [media] hdpvr: Fix memory leak in debug

cppcheck reported memory leak in device_authorizatio()
within hdpvr-core.c.
When the debug option is specified and the code jump to
"unlock:" label, print_buf was not freed.
Confirm the module succesfully compiled without error.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/usb/hdpvr/hdpvr-core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c
index 2f0c89cbac76..c5638964c3f2 100644
--- a/drivers/media/usb/hdpvr/hdpvr-core.c
+++ b/drivers/media/usb/hdpvr/hdpvr-core.c
@@ -198,7 +198,6 @@ static int device_authorization(struct hdpvr_device *dev)
 	hex_dump_to_buffer(response, 8, 16, 1, print_buf, 5*buf_size+1, 0);
 	v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, " response: %s\n",
 		 print_buf);
-	kfree(print_buf);
 #endif
 
 	msleep(100);
@@ -214,6 +213,9 @@ static int device_authorization(struct hdpvr_device *dev)
 	retval = ret != 8;
 unlock:
 	mutex_unlock(&dev->usbc_mutex);
+#ifdef HDPVR_DEBUG
+	kfree(print_buf);
+#endif
 	return retval;
 }
 

From 257cc4b5c57060a65e9c805fb1c225688ebbca80 Mon Sep 17 00:00:00 2001
From: Martin Bugge <marbugge@cisco.com>
Date: Thu, 23 Jan 2014 06:40:00 -0300
Subject: [PATCH 095/171] [media] v4l2-dv-timings: fix GTF calculation

Round off image width to nearest 8 (GTF_CELL_GRAN)

A source sending a GTF (Generalized Timing Formula) format have no means of
signalling image width. The assumed aspect ratio may result in an odd image
width but according to the standard image width should be in multiple of 8.

Cc: Mats Randgaard <matrandg@cisco.com>
Signed-off-by: Martin Bugge <marbugge@cisco.com>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-dv-timings.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index ee52b9f4a944..f7902fe8a526 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -515,6 +515,7 @@ bool v4l2_detect_gtf(unsigned frame_height,
 		aspect.denominator = 9;
 	}
 	image_width = ((image_height * aspect.numerator) / aspect.denominator);
+	image_width = (image_width + GTF_CELL_GRAN/2) & ~(GTF_CELL_GRAN - 1);
 
 	/* Horizontal */
 	if (default_gtf)

From 57f0547fbc1e925f5e58c76f311a6632c3f37740 Mon Sep 17 00:00:00 2001
From: Martin Bugge <marbugge@cisco.com>
Date: Wed, 29 Jan 2014 06:50:20 -0300
Subject: [PATCH 096/171] [media] adv7842: Composite free-run platfrom-data fix

Incorrectly setting of free-run for Composite.
Copy/paste regression fix.

Signed-off-by: Martin Bugge <marbugge@cisco.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/i2c/adv7842.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index 1effc21e1cdd..9bbd6656fb8f 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -2554,7 +2554,7 @@ static int adv7842_core_init(struct v4l2_subdev *sd)
 	sdp_write_and_or(sd, 0xdd, 0xf0, pdata->sdp_free_run_force |
 					 (pdata->sdp_free_run_cbar_en << 1) |
 					 (pdata->sdp_free_run_man_col_en << 2) |
-					 (pdata->sdp_free_run_force << 3));
+					 (pdata->sdp_free_run_auto << 3));
 
 	/* TODO from platform data */
 	cp_write(sd, 0x69, 0x14);   /* Enable CP CSC */

From 4294ad1c52caad3239404cea9e0cbfeb435e3a25 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Wed, 29 Jan 2014 19:11:46 +0100
Subject: [PATCH 097/171] MIPS: Alchemy: Fix DB1100 GPIO registration

With CONFIG_GPIOLIB=y gpios need to be requested before they can be
modified.  Request the SD carddetect pins, and drop the SPI direction
setup, as the driver does that for us anyway.  This gets rid of a
lot of WARN_ON()s triggered by GPIO core, and restores functionality
of the touschreen controller.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: Linux-MIPS <linux-mips@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/6497/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/devboards/db1000.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
index 11f3ad20321c..5483906e0f86 100644
--- a/arch/mips/alchemy/devboards/db1000.c
+++ b/arch/mips/alchemy/devboards/db1000.c
@@ -534,13 +534,10 @@ static int __init db1000_dev_init(void)
 		s0 = AU1100_GPIO1_INT;
 		s1 = AU1100_GPIO4_INT;
 
+		gpio_request(19, "sd0_cd");
+		gpio_request(20, "sd1_cd");
 		gpio_direction_input(19);	/* sd0 cd# */
 		gpio_direction_input(20);	/* sd1 cd# */
-		gpio_direction_input(21);	/* touch pendown# */
-		gpio_direction_input(207);	/* SPI MISO */
-		gpio_direction_output(208, 0);	/* SPI MOSI */
-		gpio_direction_output(209, 1);	/* SPI SCK */
-		gpio_direction_output(210, 1);	/* SPI CS# */
 
 		/* spi_gpio on SSI0 pins */
 		pfc = __raw_readl((void __iomem *)SYS_PINFUNC);

From 6776254b1c28fece9535d42baf2db88756121fe7 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 4 Feb 2014 12:29:01 +0000
Subject: [PATCH 098/171] MIPS: Wire up sched_setattr/sched_getattr syscalls

Wire up for MIPS the new sched_setattr and sched_getattr system calls
added in commit d50dde5a10f3 (sched: Add new scheduler syscalls to
support an extended scheduling parameters ABI) merged in v3.14-rc1.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6502/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/uapi/asm/unistd.h | 18 ++++++++++++------
 arch/mips/kernel/scall32-o32.S      |  2 ++
 arch/mips/kernel/scall64-64.S       |  2 ++
 arch/mips/kernel/scall64-n32.S      |  2 ++
 arch/mips/kernel/scall64-o32.S      |  2 ++
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 1dee279f9665..d6e154a9e6a5 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -369,16 +369,18 @@
 #define __NR_process_vm_writev		(__NR_Linux + 346)
 #define __NR_kcmp			(__NR_Linux + 347)
 #define __NR_finit_module		(__NR_Linux + 348)
+#define __NR_sched_setattr		(__NR_Linux + 349)
+#define __NR_sched_getattr		(__NR_Linux + 350)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		348
+#define __NR_Linux_syscalls		350
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		348
+#define __NR_O32_Linux_syscalls		350
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -695,16 +697,18 @@
 #define __NR_kcmp			(__NR_Linux + 306)
 #define __NR_finit_module		(__NR_Linux + 307)
 #define __NR_getdents64			(__NR_Linux + 308)
+#define __NR_sched_setattr		(__NR_Linux + 309)
+#define __NR_sched_getattr		(__NR_Linux + 310)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		308
+#define __NR_Linux_syscalls		310
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		308
+#define __NR_64_Linux_syscalls		310
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1025,15 +1029,17 @@
 #define __NR_process_vm_writev		(__NR_Linux + 310)
 #define __NR_kcmp			(__NR_Linux + 311)
 #define __NR_finit_module		(__NR_Linux + 312)
+#define __NR_sched_setattr		(__NR_Linux + 313)
+#define __NR_sched_getattr		(__NR_Linux + 314)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		312
+#define __NR_Linux_syscalls		314
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		312
+#define __NR_N32_Linux_syscalls		314
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index e8e541b40d86..a5b14f48e1af 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -563,3 +563,5 @@ EXPORT(sys_call_table)
 	PTR	sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 4350 */
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 57e3742fec59..b56e254beb15 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -425,4 +425,6 @@ EXPORT(sys_call_table)
 	PTR	sys_kcmp
 	PTR	sys_finit_module
 	PTR	sys_getdents64
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 5310 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 2f48f5934399..f7e5b72cf481 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -418,4 +418,6 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_process_vm_writev	/* 6310 */
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f1acdb429f4f..6788727d91af 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -541,4 +541,6 @@ EXPORT(sys32_call_table)
 	PTR	compat_sys_process_vm_writev
 	PTR	sys_kcmp
 	PTR	sys_finit_module
+	PTR	sys_sched_setattr
+	PTR	sys_sched_getattr		/* 4350 */
 	.size	sys32_call_table,.-sys32_call_table

From 40507403485fcb56b83d6ddfc954e9b08305054c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Feb 2014 14:41:26 +0000
Subject: [PATCH 099/171] arm64: vdso: prevent ld from aligning PT_LOAD
 segments to 64k

Whilst the text segment for our VDSO is marked as PT_LOAD in the ELF
headers, it is mapped by the kernel and not actually subject to
demand-paging. ld doesn't realise this, and emits a p_align field of 64k
(the maximum supported page size), which conflicts with the load address
picked by the kernel on 4k systems, which will be 4k aligned. This
causes GDB to fail with "Failed to read a valid object file image from
memory" when attempting to load the VDSO.

This patch passes the -n option to ld, which prevents it from aligning
PT_LOAD segments to the maximum page size.

Cc: <stable@vger.kernel.org>
Reported-by: Kyle McMartin <kyle@redhat.com>
Acked-by: Kyle McMartin <kyle@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d8064af42e62..6d20b7d162d8 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S
 
 # Actual build commands
 quiet_cmd_vdsold = VDSOL $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
 quiet_cmd_vdsoas = VDSOA $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 

From 12b13835a0a8bfabea68741e1ab4d4a4cb77d037 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 4 Feb 2014 12:20:01 -0800
Subject: [PATCH 100/171] kbuild: don't enable DEBUG_INFO when building for
 COMPILE_TEST

It really isn't very interesting to have DEBUG_INFO when doing compile
coverage stuff (you wouldn't want to run the result anyway, that's kind
of the whole point of COMPILE_TEST), and it currently makes the build
take longer and use much more disk space for "all{yes,mod}config".

There's somewhat active discussion about this still, and we might end up
with some new config option for things like this (Andi points out that
the silly X86_DECODER_SELFTEST option also slows down the normal
coverage tests hugely), but I'm starting the ball rolling with this
simple one-liner.

DEBUG_INFO isn't that noticeable if you have tons of memory and a good
IO subsystem, but it hurts you a lot if you don't - for very little
upside for the common use.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dbf94a7d25a8..a48abeac753f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options"
 
 config DEBUG_INFO
 	bool "Compile the kernel with debug info"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && !COMPILE_TEST
 	help
           If you say Y here the resulting kernel image will include
 	  debugging info resulting in a larger kernel image.

From fcb6a15c2e7e76d493e6f91ea889ab40e1c643a4 Mon Sep 17 00:00:00 2001
From: Dirk Brandewie <dirk.j.brandewie@intel.com>
Date: Mon, 3 Feb 2014 08:55:31 -0800
Subject: [PATCH 101/171] intel_pstate: Take core C0 time into account for core
 busy calculation

Take non-idle time into account when calculating core busy time.
This ensures that intel_pstate will notice a decrease in load.

References: https://bugzilla.kernel.org/show_bug.cgi?id=66581
Cc: 3.10+ <stable@vger.kernel.org> # 3.10+
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7e257b233602..79606f473f48 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -57,6 +57,7 @@ struct sample {
 	int32_t core_pct_busy;
 	u64 aperf;
 	u64 mperf;
+	unsigned long long tsc;
 	int freq;
 };
 
@@ -96,6 +97,7 @@ struct cpudata {
 
 	u64	prev_aperf;
 	u64	prev_mperf;
+	unsigned long long prev_tsc;
 	int	sample_ptr;
 	struct sample samples[SAMPLE_COUNT];
 };
@@ -548,30 +550,41 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu,
 					struct sample *sample)
 {
 	u64 core_pct;
-	core_pct = div64_u64(int_tofp(sample->aperf * 100),
-			     sample->mperf);
-	sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000);
+	u64 c0_pct;
 
-	sample->core_pct_busy = core_pct;
+	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
+
+	c0_pct = div64_u64(sample->mperf * 100, sample->tsc);
+	sample->freq = fp_toint(
+		mul_fp(int_tofp(cpu->pstate.max_pstate),
+			int_tofp(core_pct * 1000)));
+
+	sample->core_pct_busy = mul_fp(int_tofp(core_pct),
+				div_fp(int_tofp(c0_pct + 1), int_tofp(100)));
 }
 
 static inline void intel_pstate_sample(struct cpudata *cpu)
 {
 	u64 aperf, mperf;
+	unsigned long long tsc;
 
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
+	tsc = native_read_tsc();
 
 	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
 	cpu->samples[cpu->sample_ptr].aperf = aperf;
 	cpu->samples[cpu->sample_ptr].mperf = mperf;
+	cpu->samples[cpu->sample_ptr].tsc = tsc;
 	cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
 	cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
+	cpu->samples[cpu->sample_ptr].tsc -= cpu->prev_tsc;
 
 	intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
 
 	cpu->prev_aperf = aperf;
 	cpu->prev_mperf = mperf;
+	cpu->prev_tsc = tsc;
 }
 
 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)

From 7b320cb1ed2dbd2c5f2a778197baf76fd6bf545a Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 4 Feb 2014 09:07:09 +0100
Subject: [PATCH 102/171] pinctrl: protect pinctrl_list add

We have few fedora bug reports about list corruption on pinctrl,
for example:
https://bugzilla.redhat.com/show_bug.cgi?id=1051918

Most likely corruption happen due lack of protection of pinctrl_list
when adding new nodes to it. Patch corrects that.

Fixes: 42fed7ba44e ("pinctrl: move subsystem mutex to pinctrl_dev struct")
Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 5ee61a470016..cab020a58bf5 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -851,7 +851,9 @@ static struct pinctrl *create_pinctrl(struct device *dev)
 	kref_init(&p->users);
 
 	/* Add the pinctrl handle to the global list */
+	mutex_lock(&pinctrl_list_mutex);
 	list_add_tail(&p->node, &pinctrl_list);
+	mutex_unlock(&pinctrl_list_mutex);
 
 	return p;
 }

From e18ac62fa4b3f16234bab0d5a6627c57dbae9e7e Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 3 Feb 2014 12:59:16 +0200
Subject: [PATCH 103/171] ACPI / video: Add HP EliteBook Revolve 810 to the
 blacklist

On HP EliteBook Revolve 810 the ACPI backlight device doesn't work as
expected. For example when resuming from system sleep, it seems to lose
backlight settings.

Forcing Intel driver fixes the problem so add this machine the ACPI
video detect blacklist.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index f0447d3daf2c..a697b77b8865 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -170,6 +170,14 @@ static struct dmi_system_id video_detect_dmi_table[] = {
 	},
 	{
 	.callback = video_detect_force_vendor,
+	.ident = "HP EliteBook Revolve 810",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook Revolve 810 G1"),
+		},
+	},
+	{
+	.callback = video_detect_force_vendor,
 	.ident = "Lenovo Yoga 13",
 	.matches = {
 		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),

From eb5ed9a3221a30edc7b48b87af550dbcc0e82c80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 30 Jan 2014 14:56:56 +0300
Subject: [PATCH 104/171] ACPI / utils: remove a pointless NULL check

"element" can't be NULL because it is the address of a struct member.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/utils.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 0347a37eb438..85e3b612bdc0 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -99,10 +99,6 @@ acpi_extract_package(union acpi_object *package,
 
 		union acpi_object *element = &(package->package.elements[i]);
 
-		if (!element) {
-			return AE_BAD_DATA;
-		}
-
 		switch (element->type) {
 
 		case ACPI_TYPE_INTEGER:

From 085ca1175cdccc8e400f6d06cf64e209b46021a2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 30 Jan 2014 14:55:15 +0300
Subject: [PATCH 105/171] ACPI / proc: remove unneeded NULL check

We already verified that "ldev" was non-NULL earlier and also we
dereference again without checking a three lines later.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index 50fe34ffe932..75c28eae8860 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -60,7 +60,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
 				seq_printf(seq, "%c%-8s  %s:%s\n",
 					dev->wakeup.flags.run_wake ? '*' : ' ',
 					(device_may_wakeup(&dev->dev) ||
-					(ldev && device_may_wakeup(ldev))) ?
+					device_may_wakeup(ldev)) ?
 					"enabled" : "disabled",
 					ldev->bus ? ldev->bus->name :
 					"no-bus", dev_name(ldev));

From 47a08c85f70ebdbaaf42b444af57dd6f83e04485 Mon Sep 17 00:00:00 2001
From: "Luis G.F" <luisgf@luisgf.es>
Date: Tue, 21 Jan 2014 15:40:43 +0100
Subject: [PATCH 106/171] ACPI / battery: Fix incorrect sscanf() string in
 acpi_battery_init_alarm()

Fix incorrect sscanf() string in acpi_battery_init_alarm().
Change from %ld to %lu, because 'x' is unsigned long.

Signed-off-by: Luis G.F <luisgf@luisgf.es>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 470e7542bf31..018a42883706 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -549,7 +549,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 {
 	unsigned long x;
 	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
-	if (sscanf(buf, "%ld\n", &x) == 1)
+	if (sscanf(buf, "%lu\n", &x) == 1)
 		battery->alarm = x/1000;
 	if (acpi_battery_present(battery))
 		acpi_battery_set_alarm(battery);

From ec22b4aa993abbd18f5bbbcb20a1c56be3b1d38b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 5 Feb 2014 14:13:56 +1000
Subject: [PATCH 107/171] drm/mgag200: fix typo causing bw limits to be ignored
 on some chips

mode->mdev otherwise the bw limits never kick in.

Reported in RHEL testing.

Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mgag200/mgag200_mode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index b8583f275e80..968374776db9 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1519,11 +1519,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 		(mga_vga_calculate_mode_bandwidth(mode, bpp)
 			> (32700 * 1024))) {
 		return MODE_BANDWIDTH;
-	} else if (mode->type == G200_EH &&
+	} else if (mdev->type == G200_EH &&
 		(mga_vga_calculate_mode_bandwidth(mode, bpp)
 			> (37500 * 1024))) {
 		return MODE_BANDWIDTH;
-	} else if (mode->type == G200_ER &&
+	} else if (mdev->type == G200_ER &&
 		(mga_vga_calculate_mode_bandwidth(mode,
 			bpp) > (55000 * 1024))) {
 		return MODE_BANDWIDTH;

From d3c56568f43807135f2c2a09582a69f809f0d8b7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 3 Feb 2014 09:56:13 +0100
Subject: [PATCH 108/171] ALSA: hda/realtek - Avoid invalid COEFs for ALC271X

We've seen often problems after suspend/resume on Acer Aspire One
AO725 with ALC271X codec as reported in kernel bugzilla, and it turned
out that some COEFs doesn't work and triggers the codec communication
stall.

Since these magic COEF setups are specific to ALC269VB for some PLL
configurations, the machine works even without these manual
adjustment.  So, let's simply avoid applying them for ALC271X.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52181
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 56a8f1876603..e16a71e586f0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4427,6 +4427,9 @@ static void alc269_fill_coef(struct hda_codec *codec)
 
 	if (spec->codec_variant != ALC269_TYPE_ALC269VB)
 		return;
+	/* ALC271X doesn't seem to support these COEFs (bko#52181) */
+	if (!strcmp(codec->chip_name, "ALC271X"))
+		return;
 
 	if ((alc_get_coef0(codec) & 0x00ff) < 0x015) {
 		alc_write_coef_idx(codec, 0xf, 0x960b);

From c7579fed1f1b2567529aea64ef19871337403ab3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 5 Feb 2014 07:28:10 +0100
Subject: [PATCH 109/171] ALSA: hda - Add missing mixer widget for AD1983

The mixer widget on AD1983 at NID 0x0e was missing in the commit
[f2f8be43c5c9: ALSA: hda - Add aamix NID to AD codecs].

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70011
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_analog.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 50b2427f19ca..195cd62cdce5 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -498,6 +498,7 @@ static int patch_ad1983(struct hda_codec *codec)
 		return err;
 	spec = codec->spec;
 
+	spec->gen.mixer_nid = 0x0e;
 	spec->gen.beep_nid = 0x10;
 	set_beep_amp(spec, 0x10, 0, HDA_OUTPUT);
 	err = ad198x_parse_auto_config(codec, false);

From c20f31ec421ea4fabea5e95a6afd46c5f41e5599 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 3 Feb 2014 11:02:10 +0100
Subject: [PATCH 110/171] ALSA: hda - Fix missing VREF setup for Mac Pro 1,1

Mac Pro 1,1 with ALC889A codec needs the VREF setup on NID 0x18 to
VREF50, in order to make the speaker working.  The same fixup was
already needed for MacBook Air 1,1, so we can reuse it.

Reported-by: Nicolai Beuermann <mail@nico-beuermann.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e16a71e586f0..d9693ca9546f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1821,6 +1821,7 @@ enum {
 	ALC889_FIXUP_IMAC91_VREF,
 	ALC889_FIXUP_MBA11_VREF,
 	ALC889_FIXUP_MBA21_VREF,
+	ALC889_FIXUP_MP11_VREF,
 	ALC882_FIXUP_INV_DMIC,
 	ALC882_FIXUP_NO_PRIMARY_HP,
 	ALC887_FIXUP_ASUS_BASS,
@@ -2190,6 +2191,12 @@ static const struct hda_fixup alc882_fixups[] = {
 		.chained = true,
 		.chain_id = ALC889_FIXUP_MBP_VREF,
 	},
+	[ALC889_FIXUP_MP11_VREF] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc889_fixup_mba11_vref,
+		.chained = true,
+		.chain_id = ALC885_FIXUP_MACPRO_GPIO,
+	},
 	[ALC882_FIXUP_INV_DMIC] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_inv_dmic_0x12,
@@ -2253,7 +2260,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF),
 	SND_PCI_QUIRK(0x106b, 0x00a1, "Macbook", ALC889_FIXUP_MBP_VREF),
 	SND_PCI_QUIRK(0x106b, 0x00a4, "MacbookPro 4,1", ALC889_FIXUP_MBP_VREF),
-	SND_PCI_QUIRK(0x106b, 0x0c00, "Mac Pro", ALC885_FIXUP_MACPRO_GPIO),
+	SND_PCI_QUIRK(0x106b, 0x0c00, "Mac Pro", ALC889_FIXUP_MP11_VREF),
 	SND_PCI_QUIRK(0x106b, 0x1000, "iMac 24", ALC885_FIXUP_MACPRO_GPIO),
 	SND_PCI_QUIRK(0x106b, 0x2800, "AppleTV", ALC885_FIXUP_MACPRO_GPIO),
 	SND_PCI_QUIRK(0x106b, 0x2c00, "MacbookPro rev3", ALC889_FIXUP_MBP_VREF),

From 76c7d18bcddc9794f898ebdee44a3160c636da9c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Jan 2014 10:46:12 +0100
Subject: [PATCH 111/171] drm/vmwgfx: Don't commit staged bindings if execbuf
 fails

If execbuf fails and binding commands are never sent to the device,
don't commit the staged context bindings to the tracker.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 7a5f1eb55c5a..3f0b4d1450ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -114,8 +114,10 @@ static void vmw_resource_list_unreserve(struct list_head *list,
 		 * persistent context binding tracker.
 		 */
 		if (unlikely(val->staged_bindings)) {
-			vmw_context_binding_state_transfer
-				(val->res, val->staged_bindings);
+			if (!backoff) {
+				vmw_context_binding_state_transfer
+					(val->res, val->staged_bindings);
+			}
 			kfree(val->staged_bindings);
 			val->staged_bindings = NULL;
 		}

From cf5e3413337309050c05e13dcebe85b7194a21e5 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Jan 2014 10:58:19 +0100
Subject: [PATCH 112/171] drm/vmwgfx: Fix regression caused by "drm/ttm: make
 ttm reservation calls behave like reservation calls"

The call to ttm_eu_backoff_reservation() as part of an error path would cause
a lock imbalance if the reservation ticket was not initialized. This error is
easily triggered from user-space by submitting a bogus command stream.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Cc: stable@vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 3f0b4d1450ff..dafa139c0ca7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2195,11 +2195,11 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
 				command_size);
 	if (unlikely(ret != 0))
-		goto out_err;
+		goto out_err_nores;
 
 	ret = vmw_resources_reserve(sw_context);
 	if (unlikely(ret != 0))
-		goto out_err;
+		goto out_err_nores;
 
 	ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes);
 	if (unlikely(ret != 0))
@@ -2291,10 +2291,11 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 out_unlock_binding:
 	mutex_unlock(&dev_priv->binding_mutex);
 out_err:
+	ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes);
+out_err_nores:
+	vmw_resource_list_unreserve(&sw_context->resource_list, true);
 	vmw_resource_relocations_free(&sw_context->res_relocations);
 	vmw_free_relocations(sw_context);
-	ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes);
-	vmw_resource_list_unreserve(&sw_context->resource_list, true);
 	vmw_clear_validations(sw_context);
 	if (unlikely(dev_priv->pinned_bo != NULL &&
 		     !dev_priv->query_cid_valid))

From 0ccbbae43c2dfe45ded1d7ed59b8fc7ac8214fb0 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Jan 2014 11:13:43 +0100
Subject: [PATCH 113/171] drm/vmwgfx: Fix SET_SHADER_CONST emulation on
 guest-backed devices

Emulate the SET_SHADER_CONST legacy command on guest-backed devices by
issuing a SET_GB_SHADERCONSTS_INLINE command.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 37 +++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index dafa139c0ca7..9441825c7860 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1528,6 +1528,39 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 	return 0;
 }
 
+/**
+ * vmw_cmd_set_shader_const - Validate an SVGA_3D_CMD_SET_SHADER_CONST
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_set_shader_const(struct vmw_private *dev_priv,
+				    struct vmw_sw_context *sw_context,
+				    SVGA3dCmdHeader *header)
+{
+	struct vmw_set_shader_const_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSetShaderConst body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_set_shader_const_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (dev_priv->has_mob)
+		header->id = SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE;
+
+	return 0;
+}
+
 /**
  * vmw_cmd_bind_gb_shader - Validate an SVGA_3D_CMD_BIND_GB_SHADER
  * command
@@ -1642,8 +1675,8 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
 		    true, true, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader,
 		    true, false, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check,
-		    true, true, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const,
+		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw,
 		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check,

From c1a21373d2cb94a7808161a8c237b249cd799ce7 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Jan 2014 11:18:38 +0100
Subject: [PATCH 114/171] drm/vmwgfx: Fix legacy surface reference size
 copyback

Surfaces created using the guest-backed surface interface only keeps the
base mip size, so only copy that if the legacy surface reference
ioctl requests the size information.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 979da1c246a5..ec2b99833fce 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -908,8 +908,8 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 	    rep->size_addr;
 
 	if (user_sizes)
-		ret = copy_to_user(user_sizes, srf->sizes,
-				   srf->num_sizes * sizeof(*srf->sizes));
+		ret = copy_to_user(user_sizes, &srf->base_size,
+				   sizeof(srf->base_size));
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("copy_to_user failed %p %u\n",
 			  user_sizes, srf->num_sizes);

From d5bde956630b86462ee22055f5816a04290aed57 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 31 Jan 2014 10:12:10 +0100
Subject: [PATCH 115/171] drm/vmwgfx: Emulate legacy shaders on guest-backed
 devices v2

Command stream legacy shader creation and destruction is replaced by
NOPs in the command stream, and instead guest-backed shaders are created
and destroyed as part of the command validation process.

v2: Removed some stray debug messages.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |   7 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |  29 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 197 ++++++++--
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c  | 489 +++++++++++++++++++++---
 4 files changed, 632 insertions(+), 90 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9893328f8fdc..3bdc0adc656d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -941,6 +941,7 @@ static void vmw_postclose(struct drm_device *dev,
 		drm_master_put(&vmw_fp->locked_master);
 	}
 
+	vmw_compat_shader_man_destroy(vmw_fp->shman);
 	ttm_object_file_release(&vmw_fp->tfile);
 	kfree(vmw_fp);
 }
@@ -960,11 +961,17 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 	if (unlikely(vmw_fp->tfile == NULL))
 		goto out_no_tfile;
 
+	vmw_fp->shman = vmw_compat_shader_man_create(dev_priv);
+	if (IS_ERR(vmw_fp->shman))
+		goto out_no_shman;
+
 	file_priv->driver_priv = vmw_fp;
 	dev_priv->bdev.dev_mapping = dev->dev_mapping;
 
 	return 0;
 
+out_no_shman:
+	ttm_object_file_release(&vmw_fp->tfile);
 out_no_tfile:
 	kfree(vmw_fp);
 	return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 554e7fa33082..cef0ff7ac738 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -75,10 +75,14 @@
 #define VMW_RES_FENCE ttm_driver_type3
 #define VMW_RES_SHADER ttm_driver_type4
 
+struct vmw_compat_shader_manager;
+
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
 	struct list_head fence_events;
+	bool gb_aware;
+	struct vmw_compat_shader_manager *shman;
 };
 
 struct vmw_dma_buffer {
@@ -318,7 +322,7 @@ struct vmw_sw_context{
 	struct drm_open_hash res_ht;
 	bool res_ht_initialized;
 	bool kernel; /**< is the called made from the kernel */
-	struct ttm_object_file *tfile;
+	struct vmw_fpriv *fp;
 	struct list_head validate_nodes;
 	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
 	uint32_t cur_reloc;
@@ -336,6 +340,7 @@ struct vmw_sw_context{
 	bool needs_post_query_barrier;
 	struct vmw_resource *error_resource;
 	struct vmw_ctx_binding_state staged_bindings;
+	struct list_head staged_shaders;
 };
 
 struct vmw_legacy_display;
@@ -991,6 +996,28 @@ extern int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_priv);
 extern int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
+extern int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man,
+				    SVGA3dShaderType shader_type,
+				    u32 *user_key);
+extern void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man,
+				      struct list_head *list);
+extern void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man,
+				      struct list_head *list);
+extern int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man,
+				    u32 user_key,
+				    SVGA3dShaderType shader_type,
+				    struct list_head *list);
+extern int vmw_compat_shader_add(struct vmw_compat_shader_manager *man,
+				 u32 user_key, const void *bytecode,
+				 SVGA3dShaderType shader_type,
+				 size_t size,
+				 struct ttm_object_file *tfile,
+				 struct list_head *list);
+extern struct vmw_compat_shader_manager *
+vmw_compat_shader_man_create(struct vmw_private *dev_priv);
+extern void
+vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man);
+
 
 /**
  * Inline helper functions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 9441825c7860..352224b9d667 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -235,8 +235,12 @@ static void vmw_resource_relocations_apply(uint32_t *cb,
 {
 	struct vmw_resource_relocation *rel;
 
-	list_for_each_entry(rel, list, head)
-		cb[rel->offset] = rel->res->id;
+	list_for_each_entry(rel, list, head) {
+		if (likely(rel->res != NULL))
+			cb[rel->offset] = rel->res->id;
+		else
+			cb[rel->offset] = SVGA_3D_CMD_NOP;
+	}
 }
 
 static int vmw_cmd_invalid(struct vmw_private *dev_priv,
@@ -381,22 +385,27 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context)
 }
 
 /**
- * vmw_cmd_res_check - Check that a resource is present and if so, put it
+ * vmw_cmd_compat_res_check - Check that a resource is present and if so, put it
  * on the resource validate list unless it's already there.
  *
  * @dev_priv: Pointer to a device private structure.
  * @sw_context: Pointer to the software context.
  * @res_type: Resource type.
  * @converter: User-space visisble type specific information.
- * @id: Pointer to the location in the command buffer currently being
+ * @id: user-space resource id handle.
+ * @id_loc: Pointer to the location in the command buffer currently being
  * parsed from where the user-space resource id handle is located.
+ * @p_val: Pointer to pointer to resource validalidation node. Populated
+ * on exit.
  */
-static int vmw_cmd_res_check(struct vmw_private *dev_priv,
-			     struct vmw_sw_context *sw_context,
-			     enum vmw_res_type res_type,
-			     const struct vmw_user_resource_conv *converter,
-			     uint32_t *id,
-			     struct vmw_resource_val_node **p_val)
+static int
+vmw_cmd_compat_res_check(struct vmw_private *dev_priv,
+			 struct vmw_sw_context *sw_context,
+			 enum vmw_res_type res_type,
+			 const struct vmw_user_resource_conv *converter,
+			 uint32_t id,
+			 uint32_t *id_loc,
+			 struct vmw_resource_val_node **p_val)
 {
 	struct vmw_res_cache_entry *rcache =
 		&sw_context->res_cache[res_type];
@@ -404,7 +413,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 	struct vmw_resource_val_node *node;
 	int ret;
 
-	if (*id == SVGA3D_INVALID_ID) {
+	if (id == SVGA3D_INVALID_ID) {
 		if (p_val)
 			*p_val = NULL;
 		if (res_type == vmw_res_context) {
@@ -419,7 +428,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 	 * resource
 	 */
 
-	if (likely(rcache->valid && *id == rcache->handle)) {
+	if (likely(rcache->valid && id == rcache->handle)) {
 		const struct vmw_resource *res = rcache->res;
 
 		rcache->node->first_usage = false;
@@ -428,28 +437,28 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 
 		return vmw_resource_relocation_add
 			(&sw_context->res_relocations, res,
-			 id - sw_context->buf_start);
+			 id_loc - sw_context->buf_start);
 	}
 
 	ret = vmw_user_resource_lookup_handle(dev_priv,
-					      sw_context->tfile,
-					      *id,
+					      sw_context->fp->tfile,
+					      id,
 					      converter,
 					      &res);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use resource 0x%08x.\n",
-			  (unsigned) *id);
+			  (unsigned) id);
 		dump_stack();
 		return ret;
 	}
 
 	rcache->valid = true;
 	rcache->res = res;
-	rcache->handle = *id;
+	rcache->handle = id;
 
 	ret = vmw_resource_relocation_add(&sw_context->res_relocations,
 					  res,
-					  id - sw_context->buf_start);
+					  id_loc - sw_context->buf_start);
 	if (unlikely(ret != 0))
 		goto out_no_reloc;
 
@@ -482,6 +491,31 @@ out_no_reloc:
 	return ret;
 }
 
+/**
+ * vmw_cmd_res_check - Check that a resource is present and if so, put it
+ * on the resource validate list unless it's already there.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @sw_context: Pointer to the software context.
+ * @res_type: Resource type.
+ * @converter: User-space visisble type specific information.
+ * @id_loc: Pointer to the location in the command buffer currently being
+ * parsed from where the user-space resource id handle is located.
+ * @p_val: Pointer to pointer to resource validalidation node. Populated
+ * on exit.
+ */
+static int
+vmw_cmd_res_check(struct vmw_private *dev_priv,
+		  struct vmw_sw_context *sw_context,
+		  enum vmw_res_type res_type,
+		  const struct vmw_user_resource_conv *converter,
+		  uint32_t *id_loc,
+		  struct vmw_resource_val_node **p_val)
+{
+	return vmw_cmd_compat_res_check(dev_priv, sw_context, res_type,
+					converter, *id_loc, id_loc, p_val);
+}
+
 /**
  * vmw_cmd_cid_check - Check a command header for valid context information.
  *
@@ -769,7 +803,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
+	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use MOB buffer.\n");
 		return -EINVAL;
@@ -830,7 +864,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
+	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use GMR region.\n");
 		return -EINVAL;
@@ -1129,7 +1163,8 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 
 	srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res);
 
-	vmw_kms_cursor_snoop(srf, sw_context->tfile, &vmw_bo->base, header);
+	vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->base,
+			     header);
 
 out_no_surface:
 	vmw_dmabuf_unreference(&vmw_bo);
@@ -1480,6 +1515,98 @@ static int vmw_cmd_invalidate_gb_surface(struct vmw_private *dev_priv,
 				 &cmd->body.sid, NULL);
 }
 
+
+/**
+ * vmw_cmd_shader_define - Validate an SVGA_3D_CMD_SHADER_DEFINE
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_shader_define(struct vmw_private *dev_priv,
+				 struct vmw_sw_context *sw_context,
+				 SVGA3dCmdHeader *header)
+{
+	struct vmw_shader_define_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDefineShader body;
+	} *cmd;
+	int ret;
+	size_t size;
+
+	cmd = container_of(header, struct vmw_shader_define_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(!dev_priv->has_mob))
+		return 0;
+
+	size = cmd->header.size - sizeof(cmd->body);
+	ret = vmw_compat_shader_add(sw_context->fp->shman,
+				    cmd->body.shid, cmd + 1,
+				    cmd->body.type, size,
+				    sw_context->fp->tfile,
+				    &sw_context->staged_shaders);
+	if (unlikely(ret != 0))
+		return ret;
+
+	return vmw_resource_relocation_add(&sw_context->res_relocations,
+					   NULL, &cmd->header.id -
+					   sw_context->buf_start);
+
+	return 0;
+}
+
+/**
+ * vmw_cmd_shader_destroy - Validate an SVGA_3D_CMD_SHADER_DESTROY
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv,
+				  struct vmw_sw_context *sw_context,
+				  SVGA3dCmdHeader *header)
+{
+	struct vmw_shader_destroy_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDestroyShader body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_shader_destroy_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(!dev_priv->has_mob))
+		return 0;
+
+	ret = vmw_compat_shader_remove(sw_context->fp->shman,
+				       cmd->body.shid,
+				       cmd->body.type,
+				       &sw_context->staged_shaders);
+	if (unlikely(ret != 0))
+		return ret;
+
+	return vmw_resource_relocation_add(&sw_context->res_relocations,
+					   NULL, &cmd->header.id -
+					   sw_context->buf_start);
+
+	return 0;
+}
+
 /**
  * vmw_cmd_set_shader - Validate an SVGA_3D_CMD_SET_SHADER
  * command
@@ -1511,10 +1638,17 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 	if (dev_priv->has_mob) {
 		struct vmw_ctx_bindinfo bi;
 		struct vmw_resource_val_node *res_node;
+		u32 shid = cmd->body.shid;
 
-		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_shader,
-					user_shader_converter,
-					&cmd->body.shid, &res_node);
+		(void) vmw_compat_shader_lookup(sw_context->fp->shman,
+						cmd->body.type,
+						&shid);
+
+		ret = vmw_cmd_compat_res_check(dev_priv, sw_context,
+					       vmw_res_shader,
+					       user_shader_converter,
+					       shid,
+					       &cmd->body.shid, &res_node);
 		if (unlikely(ret != 0))
 			return ret;
 
@@ -1669,10 +1803,10 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
 		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check,
 		    false, false, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check,
-		    true, true, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check,
-		    true, true, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_shader_define,
+		    true, false, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_shader_destroy,
+		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader,
 		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const,
@@ -2206,7 +2340,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	} else
 		sw_context->kernel = true;
 
-	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
+	sw_context->fp = vmw_fpriv(file_priv);
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
 	sw_context->fence_flags = 0;
@@ -2223,6 +2357,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 			goto out_unlock;
 		sw_context->res_ht_initialized = true;
 	}
+	INIT_LIST_HEAD(&sw_context->staged_shaders);
 
 	INIT_LIST_HEAD(&resource_list);
 	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
@@ -2311,6 +2446,8 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	}
 
 	list_splice_init(&sw_context->resource_list, &resource_list);
+	vmw_compat_shaders_commit(sw_context->fp->shman,
+				  &sw_context->staged_shaders);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
 	/*
@@ -2337,6 +2474,8 @@ out_unlock:
 	list_splice_init(&sw_context->resource_list, &resource_list);
 	error_resource = sw_context->error_resource;
 	sw_context->error_resource = NULL;
+	vmw_compat_shaders_revert(sw_context->fp->shman,
+				  &sw_context->staged_shaders);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
 	/*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 1457ec4b7125..be85d7fdfb5b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -29,6 +29,8 @@
 #include "vmwgfx_resource_priv.h"
 #include "ttm/ttm_placement.h"
 
+#define VMW_COMPAT_SHADER_HT_ORDER 12
+
 struct vmw_shader {
 	struct vmw_resource res;
 	SVGA3dShaderType type;
@@ -40,6 +42,50 @@ struct vmw_user_shader {
 	struct vmw_shader shader;
 };
 
+/**
+ * enum vmw_compat_shader_state - Staging state for compat shaders
+ */
+enum vmw_compat_shader_state {
+	VMW_COMPAT_COMMITED,
+	VMW_COMPAT_ADD,
+	VMW_COMPAT_DEL
+};
+
+/**
+ * struct vmw_compat_shader - Metadata for compat shaders.
+ *
+ * @handle: The TTM handle of the guest backed shader.
+ * @tfile: The struct ttm_object_file the guest backed shader is registered
+ * with.
+ * @hash: Hash item for lookup.
+ * @head: List head for staging lists or the compat shader manager list.
+ * @state: Staging state.
+ *
+ * The structure is protected by the cmdbuf lock.
+ */
+struct vmw_compat_shader {
+	u32 handle;
+	struct ttm_object_file *tfile;
+	struct drm_hash_item hash;
+	struct list_head head;
+	enum vmw_compat_shader_state state;
+};
+
+/**
+ * struct vmw_compat_shader_manager - Compat shader manager.
+ *
+ * @shaders: Hash table containing staged and commited compat shaders
+ * @list: List of commited shaders.
+ * @dev_priv: Pointer to a device private structure.
+ *
+ * @shaders and @list are protected by the cmdbuf mutex for now.
+ */
+struct vmw_compat_shader_manager {
+	struct drm_open_hash shaders;
+	struct list_head list;
+	struct vmw_private *dev_priv;
+};
+
 static void vmw_user_shader_free(struct vmw_resource *res);
 static struct vmw_resource *
 vmw_user_shader_base_to_res(struct ttm_base_object *base);
@@ -325,13 +371,81 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data,
 					 TTM_REF_USAGE);
 }
 
+int vmw_shader_alloc(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buffer,
+		     size_t shader_size,
+		     size_t offset,
+		     SVGA3dShaderType shader_type,
+		     struct ttm_object_file *tfile,
+		     u32 *handle)
+{
+	struct vmw_user_shader *ushader;
+	struct vmw_resource *res, *tmp;
+	int ret;
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of shaders anyway.
+	 */
+	if (unlikely(vmw_user_shader_size == 0))
+		vmw_user_shader_size =
+			ttm_round_pot(sizeof(struct vmw_user_shader)) + 128;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_shader_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for shader "
+				  "creation.\n");
+		goto out;
+	}
+
+	ushader = kzalloc(sizeof(*ushader), GFP_KERNEL);
+	if (unlikely(ushader == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_shader_size);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	res = &ushader->shader.res;
+	ushader->base.shareable = false;
+	ushader->base.tfile = NULL;
+
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
+	ret = vmw_gb_shader_init(dev_priv, res, shader_size,
+				 offset, shader_type, buffer,
+				 vmw_user_shader_free);
+	if (unlikely(ret != 0))
+		goto out;
+
+	tmp = vmw_resource_reference(res);
+	ret = ttm_base_object_init(tfile, &ushader->base, false,
+				   VMW_RES_SHADER,
+				   &vmw_user_shader_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		goto out_err;
+	}
+
+	if (handle)
+		*handle = ushader->base.hash.key;
+out_err:
+	vmw_resource_unreference(&res);
+out:
+	return ret;
+}
+
+
 int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_shader *ushader;
-	struct vmw_resource *res;
-	struct vmw_resource *tmp;
 	struct drm_vmw_shader_create_arg *arg =
 		(struct drm_vmw_shader_create_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
@@ -373,69 +487,324 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 		goto out_bad_arg;
 	}
 
-	/*
-	 * Approximate idr memory usage with 128 bytes. It will be limited
-	 * by maximum number_of shaders anyway.
-	 */
-
-	if (unlikely(vmw_user_shader_size == 0))
-		vmw_user_shader_size = ttm_round_pot(sizeof(*ushader))
-			+ 128;
-
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_bad_arg;
 
-	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   vmw_user_shader_size,
-				   false, true);
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Out of graphics memory for shader"
-				  " creation.\n");
-		goto out_unlock;
-	}
+	ret = vmw_shader_alloc(dev_priv, buffer, arg->size, arg->offset,
+			       shader_type, tfile, &arg->shader_handle);
 
-	ushader = kzalloc(sizeof(*ushader), GFP_KERNEL);
-	if (unlikely(ushader == NULL)) {
-		ttm_mem_global_free(vmw_mem_glob(dev_priv),
-				    vmw_user_shader_size);
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	res = &ushader->shader.res;
-	ushader->base.shareable = false;
-	ushader->base.tfile = NULL;
-
-	/*
-	 * From here on, the destructor takes over resource freeing.
-	 */
-
-	ret = vmw_gb_shader_init(dev_priv, res, arg->size,
-				 arg->offset, shader_type, buffer,
-				 vmw_user_shader_free);
-	if (unlikely(ret != 0))
-		goto out_unlock;
-
-	tmp = vmw_resource_reference(res);
-	ret = ttm_base_object_init(tfile, &ushader->base, false,
-				   VMW_RES_SHADER,
-				   &vmw_user_shader_base_release, NULL);
-
-	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&tmp);
-		goto out_err;
-	}
-
-	arg->shader_handle = ushader->base.hash.key;
-out_err:
-	vmw_resource_unreference(&res);
-out_unlock:
 	ttm_read_unlock(&vmaster->lock);
 out_bad_arg:
 	vmw_dmabuf_unreference(&buffer);
-
 	return ret;
-
+}
+
+/**
+ * vmw_compat_shader_lookup - Look up a compat shader
+ *
+ * @man: Pointer to the compat shader manager.
+ * @shader_type: The shader type, that combined with the user_key identifies
+ * the shader.
+ * @user_key: On entry, this should be a pointer to the user_key.
+ * On successful exit, it will contain the guest-backed shader's TTM handle.
+ *
+ * Returns 0 on success. Non-zero on failure, in which case the value pointed
+ * to by @user_key is unmodified.
+ */
+int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man,
+			     SVGA3dShaderType shader_type,
+			     u32 *user_key)
+{
+	struct drm_hash_item *hash;
+	int ret;
+	unsigned long key = *user_key | (shader_type << 24);
+
+	ret = drm_ht_find_item(&man->shaders, key, &hash);
+	if (unlikely(ret != 0))
+		return ret;
+
+	*user_key = drm_hash_entry(hash, struct vmw_compat_shader,
+				   hash)->handle;
+
+	return 0;
+}
+
+/**
+ * vmw_compat_shader_free - Free a compat shader.
+ *
+ * @man: Pointer to the compat shader manager.
+ * @entry: Pointer to a struct vmw_compat_shader.
+ *
+ * Frees a struct vmw_compat_shder entry and drops its reference to the
+ * guest backed shader.
+ */
+static void vmw_compat_shader_free(struct vmw_compat_shader_manager *man,
+				   struct vmw_compat_shader *entry)
+{
+	list_del(&entry->head);
+	WARN_ON(drm_ht_remove_item(&man->shaders, &entry->hash));
+	WARN_ON(ttm_ref_object_base_unref(entry->tfile, entry->handle,
+					  TTM_REF_USAGE));
+	kfree(entry);
+}
+
+/**
+ * vmw_compat_shaders_commit - Commit a list of compat shader actions.
+ *
+ * @man: Pointer to the compat shader manager.
+ * @list: Caller's list of compat shader actions.
+ *
+ * This function commits a list of compat shader additions or removals.
+ * It is typically called when the execbuf ioctl call triggering these
+ * actions has commited the fifo contents to the device.
+ */
+void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man,
+			       struct list_head *list)
+{
+	struct vmw_compat_shader *entry, *next;
+
+	list_for_each_entry_safe(entry, next, list, head) {
+		list_del(&entry->head);
+		switch (entry->state) {
+		case VMW_COMPAT_ADD:
+			entry->state = VMW_COMPAT_COMMITED;
+			list_add_tail(&entry->head, &man->list);
+			break;
+		case VMW_COMPAT_DEL:
+			ttm_ref_object_base_unref(entry->tfile, entry->handle,
+						  TTM_REF_USAGE);
+			kfree(entry);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+/**
+ * vmw_compat_shaders_revert - Revert a list of compat shader actions
+ *
+ * @man: Pointer to the compat shader manager.
+ * @list: Caller's list of compat shader actions.
+ *
+ * This function reverts a list of compat shader additions or removals.
+ * It is typically called when the execbuf ioctl call triggering these
+ * actions failed for some reason, and the command stream was never
+ * submitted.
+ */
+void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man,
+			       struct list_head *list)
+{
+	struct vmw_compat_shader *entry, *next;
+	int ret;
+
+	list_for_each_entry_safe(entry, next, list, head) {
+		switch (entry->state) {
+		case VMW_COMPAT_ADD:
+			vmw_compat_shader_free(man, entry);
+			break;
+		case VMW_COMPAT_DEL:
+			ret = drm_ht_insert_item(&man->shaders, &entry->hash);
+			list_del(&entry->head);
+			list_add_tail(&entry->head, &man->list);
+			entry->state = VMW_COMPAT_COMMITED;
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+/**
+ * vmw_compat_shader_remove - Stage a compat shader for removal.
+ *
+ * @man: Pointer to the compat shader manager
+ * @user_key: The key that is used to identify the shader. The key is
+ * unique to the shader type.
+ * @shader_type: Shader type.
+ * @list: Caller's list of staged shader actions.
+ *
+ * This function stages a compat shader for removal and removes the key from
+ * the shader manager's hash table. If the shader was previously only staged
+ * for addition it is completely removed (But the execbuf code may keep a
+ * reference if it was bound to a context between addition and removal). If
+ * it was previously commited to the manager, it is staged for removal.
+ */
+int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man,
+			     u32 user_key, SVGA3dShaderType shader_type,
+			     struct list_head *list)
+{
+	struct vmw_compat_shader *entry;
+	struct drm_hash_item *hash;
+	int ret;
+
+	ret = drm_ht_find_item(&man->shaders, user_key | (shader_type << 24),
+			       &hash);
+	if (likely(ret != 0))
+		return -EINVAL;
+
+	entry = drm_hash_entry(hash, struct vmw_compat_shader, hash);
+
+	switch (entry->state) {
+	case VMW_COMPAT_ADD:
+		vmw_compat_shader_free(man, entry);
+		break;
+	case VMW_COMPAT_COMMITED:
+		(void) drm_ht_remove_item(&man->shaders, &entry->hash);
+		list_del(&entry->head);
+		entry->state = VMW_COMPAT_DEL;
+		list_add_tail(&entry->head, list);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_compat_shader_add - Create a compat shader and add the
+ * key to the manager
+ *
+ * @man: Pointer to the compat shader manager
+ * @user_key: The key that is used to identify the shader. The key is
+ * unique to the shader type.
+ * @bytecode: Pointer to the bytecode of the shader.
+ * @shader_type: Shader type.
+ * @tfile: Pointer to a struct ttm_object_file that the guest-backed shader is
+ * to be created with.
+ * @list: Caller's list of staged shader actions.
+ *
+ * Note that only the key is added to the shader manager's hash table.
+ * The shader is not yet added to the shader manager's list of shaders.
+ */
+int vmw_compat_shader_add(struct vmw_compat_shader_manager *man,
+			  u32 user_key, const void *bytecode,
+			  SVGA3dShaderType shader_type,
+			  size_t size,
+			  struct ttm_object_file *tfile,
+			  struct list_head *list)
+{
+	struct vmw_dma_buffer *buf;
+	struct ttm_bo_kmap_obj map;
+	bool is_iomem;
+	struct vmw_compat_shader *compat;
+	u32 handle;
+	int ret;
+
+	if (user_key > ((1 << 24) - 1) || (unsigned) shader_type > 16)
+		return -EINVAL;
+
+	/* Allocate and pin a DMA buffer */
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (unlikely(buf == NULL))
+		return -ENOMEM;
+
+	ret = vmw_dmabuf_init(man->dev_priv, buf, size, &vmw_sys_ne_placement,
+			      true, vmw_dmabuf_bo_free);
+	if (unlikely(ret != 0))
+		goto out;
+
+	ret = ttm_bo_reserve(&buf->base, false, true, false, NULL);
+	if (unlikely(ret != 0))
+		goto no_reserve;
+
+	/* Map and copy shader bytecode. */
+	ret = ttm_bo_kmap(&buf->base, 0, PAGE_ALIGN(size) >> PAGE_SHIFT,
+			  &map);
+	if (unlikely(ret != 0)) {
+		ttm_bo_unreserve(&buf->base);
+		goto no_reserve;
+	}
+
+	memcpy(ttm_kmap_obj_virtual(&map, &is_iomem), bytecode, size);
+	WARN_ON(is_iomem);
+
+	ttm_bo_kunmap(&map);
+	ret = ttm_bo_validate(&buf->base, &vmw_sys_placement, false, true);
+	WARN_ON(ret != 0);
+	ttm_bo_unreserve(&buf->base);
+
+	/* Create a guest-backed shader container backed by the dma buffer */
+	ret = vmw_shader_alloc(man->dev_priv, buf, size, 0, shader_type,
+			       tfile, &handle);
+	vmw_dmabuf_unreference(&buf);
+	if (unlikely(ret != 0))
+		goto no_reserve;
+	/*
+	 * Create a compat shader structure and stage it for insertion
+	 * in the manager
+	 */
+	compat = kzalloc(sizeof(*compat), GFP_KERNEL);
+	if (compat == NULL)
+		goto no_compat;
+
+	compat->hash.key = user_key |  (shader_type << 24);
+	ret = drm_ht_insert_item(&man->shaders, &compat->hash);
+	if (unlikely(ret != 0))
+		goto out_invalid_key;
+
+	compat->state = VMW_COMPAT_ADD;
+	compat->handle = handle;
+	compat->tfile = tfile;
+	list_add_tail(&compat->head, list);
+
+	return 0;
+
+out_invalid_key:
+	kfree(compat);
+no_compat:
+	ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
+no_reserve:
+out:
+	return ret;
+}
+
+/**
+ * vmw_compat_shader_man_create - Create a compat shader manager
+ *
+ * @dev_priv: Pointer to a device private structure.
+ *
+ * Typically done at file open time. If successful returns a pointer to a
+ * compat shader manager. Otherwise returns an error pointer.
+ */
+struct vmw_compat_shader_manager *
+vmw_compat_shader_man_create(struct vmw_private *dev_priv)
+{
+	struct vmw_compat_shader_manager *man;
+	int ret;
+
+	man = kzalloc(sizeof(*man), GFP_KERNEL);
+
+	man->dev_priv = dev_priv;
+	INIT_LIST_HEAD(&man->list);
+	ret = drm_ht_create(&man->shaders, VMW_COMPAT_SHADER_HT_ORDER);
+	if (ret == 0)
+		return man;
+
+	kfree(man);
+	return ERR_PTR(ret);
+}
+
+/**
+ * vmw_compat_shader_man_destroy - Destroy a compat shader manager
+ *
+ * @man: Pointer to the shader manager to destroy.
+ *
+ * Typically done at file close time.
+ */
+void vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man)
+{
+	struct vmw_compat_shader *entry, *next;
+
+	mutex_lock(&man->dev_priv->cmdbuf_mutex);
+	list_for_each_entry_safe(entry, next, &man->list, head)
+		vmw_compat_shader_free(man, entry);
+
+	mutex_unlock(&man->dev_priv->cmdbuf_mutex);
+	kfree(man);
 }

From a6fc955ff9fc60dcffc80003410a85d874e4f1e3 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 31 Jan 2014 10:21:10 +0100
Subject: [PATCH 116/171] drm/vmwgfx: Detect old user-space drivers and set up
 legacy emulation v2

GB aware mesa userspace drivers are detected by the fact that they are
calling the vmw getparam ioctl querying DRM_VMW_PARAM_HW_CAPS to detect
whether the device is Guest-backed object capable. For other drivers,
lie about hardware version and send the 3D capabilities in a format they
expect.

v2:
Use DRM_VMW_PARAM_MAX_MOB_MEMORY to detect gb awareness,
Make sure we don't ovwerwrite bounce buffer or write past user-space buffer
indicated size.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/svga3d_reg.h   | 24 +++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 93 ++++++++++++++++++++++-----
 2 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
index d95335cb90bd..b645647b7776 100644
--- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
@@ -2583,4 +2583,28 @@ typedef union {
    float  f;
 } SVGA3dDevCapResult;
 
+typedef enum {
+   SVGA3DCAPS_RECORD_UNKNOWN        = 0,
+   SVGA3DCAPS_RECORD_DEVCAPS_MIN    = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS        = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS_MAX    = 0x1ff,
+} SVGA3dCapsRecordType;
+
+typedef
+struct SVGA3dCapsRecordHeader {
+   uint32 length;
+   SVGA3dCapsRecordType type;
+}
+SVGA3dCapsRecordHeader;
+
+typedef
+struct SVGA3dCapsRecord {
+   SVGA3dCapsRecordHeader header;
+   uint32 data[1];
+}
+SVGA3dCapsRecord;
+
+
+typedef uint32 SVGA3dCapPair[2];
+
 #endif /* _SVGA3D_REG_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 116c49736763..f9881f9e62bd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -29,12 +29,18 @@
 #include <drm/vmwgfx_drm.h>
 #include "vmwgfx_kms.h"
 
+struct svga_3d_compat_cap {
+	SVGA3dCapsRecordHeader header;
+	SVGA3dCapPair pairs[SVGA3D_DEVCAP_MAX];
+};
+
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct drm_vmw_getparam_arg *param =
 	    (struct drm_vmw_getparam_arg *)data;
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
 	switch (param->param) {
 	case DRM_VMW_PARAM_NUM_STREAMS:
@@ -60,6 +66,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 		const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) {
+			param->value = SVGA3D_HWVERSION_WS8_B1;
+			break;
+		}
+
 		param->value =
 			ioread32(fifo_mem +
 				 ((fifo->capabilities &
@@ -69,17 +80,26 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case DRM_VMW_PARAM_MAX_SURF_MEMORY:
-		param->value = dev_priv->memory_size;
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) &&
+		    !vmw_fp->gb_aware)
+			param->value = dev_priv->max_mob_pages * PAGE_SIZE / 2;
+		else
+			param->value = dev_priv->memory_size;
 		break;
 	case DRM_VMW_PARAM_3D_CAPS_SIZE:
-		if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS)
-			param->value = SVGA3D_DEVCAP_MAX;
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) &&
+		    vmw_fp->gb_aware)
+			param->value = SVGA3D_DEVCAP_MAX * sizeof(uint32_t);
+		else if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS)
+			param->value = sizeof(struct svga_3d_compat_cap) +
+				sizeof(uint32_t);
 		else
 			param->value = (SVGA_FIFO_3D_CAPS_LAST -
-					SVGA_FIFO_3D_CAPS + 1);
-		param->value *= sizeof(uint32_t);
+					SVGA_FIFO_3D_CAPS + 1) *
+				sizeof(uint32_t);
 		break;
 	case DRM_VMW_PARAM_MAX_MOB_MEMORY:
+		vmw_fp->gb_aware = true;
 		param->value = dev_priv->max_mob_pages * PAGE_SIZE;
 		break;
 	default:
@@ -91,6 +111,38 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce,
+			       size_t size)
+{
+	struct svga_3d_compat_cap *compat_cap =
+		(struct svga_3d_compat_cap *) bounce;
+	unsigned int i;
+	size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs);
+	unsigned int max_size;
+
+	if (size < pair_offset)
+		return -EINVAL;
+
+	max_size = (size - pair_offset) / sizeof(SVGA3dCapPair);
+
+	if (max_size > SVGA3D_DEVCAP_MAX)
+		max_size = SVGA3D_DEVCAP_MAX;
+
+	compat_cap->header.length =
+		(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
+	compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	for (i = 0; i < max_size; ++i) {
+		vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+		compat_cap->pairs[i][0] = i;
+		compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return 0;
+}
+
 
 int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
@@ -104,41 +156,49 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	void *bounce;
 	int ret;
 	bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
 	if (unlikely(arg->pad64 != 0)) {
 		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
 	}
 
-	if (gb_objects)
-		size = SVGA3D_DEVCAP_MAX;
+	if (gb_objects && vmw_fp->gb_aware)
+		size = SVGA3D_DEVCAP_MAX * sizeof(uint32_t);
+	else if (gb_objects)
+		size = sizeof(struct svga_3d_compat_cap) + sizeof(uint32_t);
 	else
-		size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1);
-
-	size *= sizeof(uint32_t);
+		size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) *
+			sizeof(uint32_t);
 
 	if (arg->max_size < size)
 		size = arg->max_size;
 
-	bounce = vmalloc(size);
+	bounce = vzalloc(size);
 	if (unlikely(bounce == NULL)) {
 		DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n");
 		return -ENOMEM;
 	}
 
-	if (gb_objects) {
-		int i;
+	if (gb_objects && vmw_fp->gb_aware) {
+		int i, num;
 		uint32_t *bounce32 = (uint32_t *) bounce;
 
+		num = size / sizeof(uint32_t);
+		if (num > SVGA3D_DEVCAP_MAX)
+			num = SVGA3D_DEVCAP_MAX;
+
 		mutex_lock(&dev_priv->hw_mutex);
-		for (i = 0; i < SVGA3D_DEVCAP_MAX; ++i) {
+		for (i = 0; i < num; ++i) {
 			vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
 			*bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
 		}
 		mutex_unlock(&dev_priv->hw_mutex);
-
+	} else if (gb_objects) {
+		ret = vmw_fill_compat_cap(dev_priv, bounce, size);
+		if (unlikely(ret != 0))
+			goto out_err;
 	} else {
-
 		fifo_mem = dev_priv->mmio_virt;
 		memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
 	}
@@ -146,6 +206,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	ret = copy_to_user(buffer, bounce, size);
 	if (ret)
 		ret = -EFAULT;
+out_err:
 	vfree(bounce);
 
 	if (unlikely(ret != 0))

From 30f82d816d2dccfdc2063ac8cca994904c9b612c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 5 Feb 2014 08:13:56 +0100
Subject: [PATCH 117/171] drm/vmwgfx: Reemit context bindings when necessary v2

When a context is first referenced in the command stream, make sure that all
scrubbed (as a result of eviction) bindings are re-emitted. Also make sure that
all bound resources are put on the resource validate list.

This is needed for legacy emulation, since legacy user-space drivers will
typically not re-emit shader bindings. It also removes the requirement for
user-space drivers to re-emit render-target- and texture bindings.

Makes suspend and hibernate now also work with legacy user-space drivers on
guest-backed devices.

v2: Don't rebind on legacy devices.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_context.c  | 144 +++++++++++++++++++----
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |   6 +
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  |  85 ++++++++++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  11 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c   |   2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c  |   2 +-
 6 files changed, 222 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 82c41daebc0e..9426c53fb483 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -37,7 +37,7 @@ struct vmw_user_context {
 
 
 
-typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *);
+typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *, bool);
 
 static void vmw_user_context_free(struct vmw_resource *res);
 static struct vmw_resource *
@@ -50,9 +50,11 @@ static int vmw_gb_context_unbind(struct vmw_resource *res,
 				 bool readback,
 				 struct ttm_validate_buffer *val_buf);
 static int vmw_gb_context_destroy(struct vmw_resource *res);
-static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi);
-static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi);
-static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi);
+static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind);
+static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi,
+					   bool rebind);
+static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, bool rebind);
+static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs);
 static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs);
 static uint64_t vmw_user_context_size;
 
@@ -111,10 +113,14 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 
 	if (res->func->destroy == vmw_gb_context_destroy) {
 		mutex_lock(&dev_priv->cmdbuf_mutex);
+		mutex_lock(&dev_priv->binding_mutex);
+		(void) vmw_context_binding_state_kill
+			(&container_of(res, struct vmw_user_context, res)->cbs);
 		(void) vmw_gb_context_destroy(res);
 		if (dev_priv->pinned_bo != NULL &&
 		    !dev_priv->query_cid_valid)
 			__vmw_execbuf_release_pinned_bo(dev_priv, NULL);
+		mutex_unlock(&dev_priv->binding_mutex);
 		mutex_unlock(&dev_priv->cmdbuf_mutex);
 		return;
 	}
@@ -328,7 +334,7 @@ static int vmw_gb_context_unbind(struct vmw_resource *res,
 	BUG_ON(bo->mem.mem_type != VMW_PL_MOB);
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_state_kill(&uctx->cbs);
+	vmw_context_binding_state_scrub(&uctx->cbs);
 
 	submit_size = sizeof(*cmd2) + (readback ? sizeof(*cmd1) : 0);
 
@@ -378,10 +384,6 @@ static int vmw_gb_context_destroy(struct vmw_resource *res)
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDestroyGBContext body;
 	} *cmd;
-	struct vmw_user_context *uctx =
-		container_of(res, struct vmw_user_context, res);
-
-	BUG_ON(!list_empty(&uctx->cbs.list));
 
 	if (likely(res->id == -1))
 		return 0;
@@ -528,8 +530,9 @@ out_unlock:
  * vmw_context_scrub_shader - scrub a shader binding from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  */
-static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -548,7 +551,8 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
 	cmd->header.size = sizeof(cmd->body);
 	cmd->body.cid = bi->ctx->id;
 	cmd->body.type = bi->i1.shader_type;
-	cmd->body.shid = SVGA3D_INVALID_ID;
+	cmd->body.shid =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
 	return 0;
@@ -559,8 +563,10 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
  * from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  */
-static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi,
+					   bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -579,7 +585,8 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
 	cmd->header.size = sizeof(cmd->body);
 	cmd->body.cid = bi->ctx->id;
 	cmd->body.type = bi->i1.rt_type;
-	cmd->body.target.sid = SVGA3D_INVALID_ID;
+	cmd->body.target.sid =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	cmd->body.target.face = 0;
 	cmd->body.target.mipmap = 0;
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
@@ -591,11 +598,13 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
  * vmw_context_scrub_texture - scrub a texture binding from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  *
  * TODO: Possibly complement this function with a function that takes
  * a list of texture bindings and combines them to a single command.
  */
-static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi,
+				     bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -619,7 +628,8 @@ static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi)
 	cmd->body.c.cid = bi->ctx->id;
 	cmd->body.s1.stage = bi->i1.texture_stage;
 	cmd->body.s1.name = SVGA3D_TS_BIND_TEXTURE;
-	cmd->body.s1.value = (uint32) SVGA3D_INVALID_ID;
+	cmd->body.s1.value =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
 	return 0;
@@ -692,6 +702,7 @@ int vmw_context_binding_add(struct vmw_ctx_binding_state *cbs,
 		vmw_context_binding_drop(loc);
 
 	loc->bi = *bi;
+	loc->bi.scrubbed = false;
 	list_add_tail(&loc->ctx_list, &cbs->list);
 	INIT_LIST_HEAD(&loc->res_list);
 
@@ -727,12 +738,11 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs,
 	if (loc->bi.ctx != NULL)
 		vmw_context_binding_drop(loc);
 
-	loc->bi = *bi;
-	list_add_tail(&loc->ctx_list, &cbs->list);
-	if (bi->res != NULL)
+	if (bi->res != NULL) {
+		loc->bi = *bi;
+		list_add_tail(&loc->ctx_list, &cbs->list);
 		list_add_tail(&loc->res_list, &bi->res->binding_head);
-	else
-		INIT_LIST_HEAD(&loc->res_list);
+	}
 }
 
 /**
@@ -746,7 +756,10 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs,
  */
 static void vmw_context_binding_kill(struct vmw_ctx_binding *cb)
 {
-	(void) vmw_scrub_funcs[cb->bi.bt](&cb->bi);
+	if (!cb->bi.scrubbed) {
+		(void) vmw_scrub_funcs[cb->bi.bt](&cb->bi, false);
+		cb->bi.scrubbed = true;
+	}
 	vmw_context_binding_drop(cb);
 }
 
@@ -767,6 +780,27 @@ static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs)
 		vmw_context_binding_kill(entry);
 }
 
+/**
+ * vmw_context_binding_state_scrub - Scrub all bindings associated with a
+ * struct vmw_ctx_binding state structure.
+ *
+ * @cbs: Pointer to the context binding state tracker.
+ *
+ * Emits commands to scrub all bindings associated with the
+ * context binding state tracker.
+ */
+static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs)
+{
+	struct vmw_ctx_binding *entry;
+
+	list_for_each_entry(entry, &cbs->list, ctx_list) {
+		if (!entry->bi.scrubbed) {
+			(void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false);
+			entry->bi.scrubbed = true;
+		}
+	}
+}
+
 /**
  * vmw_context_binding_res_list_kill - Kill all bindings on a
  * resource binding list
@@ -784,6 +818,27 @@ void vmw_context_binding_res_list_kill(struct list_head *head)
 		vmw_context_binding_kill(entry);
 }
 
+/**
+ * vmw_context_binding_res_list_scrub - Scrub all bindings on a
+ * resource binding list
+ *
+ * @head: list head of resource binding list
+ *
+ * Scrub all bindings associated with a specific resource. Typically
+ * called before the resource is evicted.
+ */
+void vmw_context_binding_res_list_scrub(struct list_head *head)
+{
+	struct vmw_ctx_binding *entry;
+
+	list_for_each_entry(entry, head, res_list) {
+		if (!entry->bi.scrubbed) {
+			(void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false);
+			entry->bi.scrubbed = true;
+		}
+	}
+}
+
 /**
  * vmw_context_binding_state_transfer - Commit staged binding info
  *
@@ -803,3 +858,50 @@ void vmw_context_binding_state_transfer(struct vmw_resource *ctx,
 	list_for_each_entry_safe(entry, next, &from->list, ctx_list)
 		vmw_context_binding_transfer(&uctx->cbs, &entry->bi);
 }
+
+/**
+ * vmw_context_rebind_all - Rebind all scrubbed bindings of a context
+ *
+ * @ctx: The context resource
+ *
+ * Walks through the context binding list and rebinds all scrubbed
+ * resources.
+ */
+int vmw_context_rebind_all(struct vmw_resource *ctx)
+{
+	struct vmw_ctx_binding *entry;
+	struct vmw_user_context *uctx =
+		container_of(ctx, struct vmw_user_context, res);
+	struct vmw_ctx_binding_state *cbs = &uctx->cbs;
+	int ret;
+
+	list_for_each_entry(entry, &cbs->list, ctx_list) {
+		if (likely(!entry->bi.scrubbed))
+			continue;
+
+		if (WARN_ON(entry->bi.res == NULL || entry->bi.res->id ==
+			    SVGA3D_INVALID_ID))
+			continue;
+
+		ret = vmw_scrub_funcs[entry->bi.bt](&entry->bi, true);
+		if (unlikely(ret != 0))
+			return ret;
+
+		entry->bi.scrubbed = false;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_context_binding_list - Return a list of context bindings
+ *
+ * @ctx: The context resource
+ *
+ * Returns the current list of bindings of the given context. Note that
+ * this list becomes stale as soon as the dev_priv::binding_mutex is unlocked.
+ */
+struct list_head *vmw_context_binding_list(struct vmw_resource *ctx)
+{
+	return &(container_of(ctx, struct vmw_user_context, res)->cbs.list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index cef0ff7ac738..ecaa302a6154 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -276,6 +276,7 @@ struct vmw_ctx_bindinfo {
 	struct vmw_resource *ctx;
 	struct vmw_resource *res;
 	enum vmw_ctx_binding_type bt;
+	bool scrubbed;
 	union {
 		SVGA3dShaderType shader_type;
 		SVGA3dRenderTargetType rt_type;
@@ -574,6 +575,8 @@ struct vmw_user_resource_conv;
 
 extern void vmw_resource_unreference(struct vmw_resource **p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
+extern struct vmw_resource *
+vmw_resource_reference_unless_doomed(struct vmw_resource *res);
 extern int vmw_resource_validate(struct vmw_resource *res);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -962,6 +965,9 @@ extern void
 vmw_context_binding_state_transfer(struct vmw_resource *res,
 				   struct vmw_ctx_binding_state *cbs);
 extern void vmw_context_binding_res_list_kill(struct list_head *head);
+extern void vmw_context_binding_res_list_scrub(struct list_head *head);
+extern int vmw_context_rebind_all(struct vmw_resource *ctx);
+extern struct list_head *vmw_context_binding_list(struct vmw_resource *ctx);
 
 /*
  * Surface management - vmwgfx_surface.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 352224b9d667..269b85cc875a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -179,6 +179,44 @@ static int vmw_resource_val_add(struct vmw_sw_context *sw_context,
 	return 0;
 }
 
+/**
+ * vmw_resource_context_res_add - Put resources previously bound to a context on
+ * the validation list
+ *
+ * @dev_priv: Pointer to a device private structure
+ * @sw_context: Pointer to a software context used for this command submission
+ * @ctx: Pointer to the context resource
+ *
+ * This function puts all resources that were previously bound to @ctx on
+ * the resource validation list. This is part of the context state reemission
+ */
+static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
+					struct vmw_sw_context *sw_context,
+					struct vmw_resource *ctx)
+{
+	struct list_head *binding_list;
+	struct vmw_ctx_binding *entry;
+	int ret = 0;
+	struct vmw_resource *res;
+
+	mutex_lock(&dev_priv->binding_mutex);
+	binding_list = vmw_context_binding_list(ctx);
+
+	list_for_each_entry(entry, binding_list, ctx_list) {
+		res = vmw_resource_reference_unless_doomed(entry->bi.res);
+		if (unlikely(res == NULL))
+			continue;
+
+		ret = vmw_resource_val_add(sw_context, entry->bi.res, NULL);
+		vmw_resource_unreference(&res);
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	mutex_unlock(&dev_priv->binding_mutex);
+	return ret;
+}
+
 /**
  * vmw_resource_relocation_add - Add a relocation to the relocation list
  *
@@ -470,7 +508,11 @@ vmw_cmd_compat_res_check(struct vmw_private *dev_priv,
 	if (p_val)
 		*p_val = node;
 
-	if (node->first_usage && res_type == vmw_res_context) {
+	if (dev_priv->has_mob && node->first_usage &&
+	    res_type == vmw_res_context) {
+		ret = vmw_resource_context_res_add(dev_priv, sw_context, res);
+		if (unlikely(ret != 0))
+			goto out_no_reloc;
 		node->staged_bindings =
 			kzalloc(sizeof(*node->staged_bindings), GFP_KERNEL);
 		if (node->staged_bindings == NULL) {
@@ -516,6 +558,34 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
 					converter, *id_loc, id_loc, p_val);
 }
 
+/**
+ * vmw_rebind_contexts - Rebind all resources previously bound to
+ * referenced contexts.
+ *
+ * @sw_context: Pointer to the software context.
+ *
+ * Rebind context binding points that have been scrubbed because of eviction.
+ */
+static int vmw_rebind_contexts(struct vmw_sw_context *sw_context)
+{
+	struct vmw_resource_val_node *val;
+	int ret;
+
+	list_for_each_entry(val, &sw_context->resource_list, head) {
+		if (likely(!val->staged_bindings))
+			continue;
+
+		ret = vmw_context_rebind_all(val->res);
+		if (unlikely(ret != 0)) {
+			if (ret != -ERESTARTSYS)
+				DRM_ERROR("Failed to rebind context.\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * vmw_cmd_cid_check - Check a command header for valid context information.
  *
@@ -1640,9 +1710,10 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 		struct vmw_resource_val_node *res_node;
 		u32 shid = cmd->body.shid;
 
-		(void) vmw_compat_shader_lookup(sw_context->fp->shman,
-						cmd->body.type,
-						&shid);
+		if (shid != SVGA3D_INVALID_ID)
+			(void) vmw_compat_shader_lookup(sw_context->fp->shman,
+							cmd->body.type,
+							&shid);
 
 		ret = vmw_cmd_compat_res_check(dev_priv, sw_context,
 					       vmw_res_shader,
@@ -2395,6 +2466,12 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 		goto out_err;
 	}
 
+	if (dev_priv->has_mob) {
+		ret = vmw_rebind_contexts(sw_context);
+		if (unlikely(ret != 0))
+			goto out_err;
+	}
+
 	cmd = vmw_fifo_reserve(dev_priv, command_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving fifo space for commands.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6fdd82d42f65..2aa4bc6a4d60 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -88,6 +88,11 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+struct vmw_resource *
+vmw_resource_reference_unless_doomed(struct vmw_resource *res)
+{
+	return kref_get_unless_zero(&res->kref) ? res : NULL;
+}
 
 /**
  * vmw_resource_release_id - release a resource id to the id manager.
@@ -136,8 +141,12 @@ static void vmw_resource_release(struct kref *kref)
 		vmw_dmabuf_unreference(&res->backup);
 	}
 
-	if (likely(res->hw_destroy != NULL))
+	if (likely(res->hw_destroy != NULL)) {
 		res->hw_destroy(res);
+		mutex_lock(&dev_priv->binding_mutex);
+		vmw_context_binding_res_list_kill(&res->binding_head);
+		mutex_unlock(&dev_priv->binding_mutex);
+	}
 
 	id = res->id;
 	if (res->res_free != NULL)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index be85d7fdfb5b..217d941b8176 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -304,7 +304,7 @@ static int vmw_gb_shader_destroy(struct vmw_resource *res)
 		return 0;
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_res_list_kill(&res->binding_head);
+	vmw_context_binding_res_list_scrub(&res->binding_head);
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index ec2b99833fce..82468d902915 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -1111,7 +1111,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res)
 		return 0;
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_res_list_kill(&res->binding_head);
+	vmw_context_binding_res_list_scrub(&res->binding_head);
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {

From 276ab336b4c6e483d12fd46cbf24f97f71867710 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 5 Feb 2014 08:49:41 +0100
Subject: [PATCH 118/171] ALSA: hda - Improve loopback path lookups for AD1983

AD1983 has flexible loopback routes and the generic parser would take
wrong path confusingly instead of taking individual paths via NID 0x0c
and 0x0d.  For avoiding it, limit the connections at these widgets so
that the parser can think more straightforwardly.  This fixes the
regression of the missing line-in loopback on Dell machine.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70011
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_analog.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 195cd62cdce5..df3652ad15ef 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -491,6 +491,8 @@ static int ad1983_add_spdif_mux_ctl(struct hda_codec *codec)
 static int patch_ad1983(struct hda_codec *codec)
 {
 	struct ad198x_spec *spec;
+	static hda_nid_t conn_0c[] = { 0x08 };
+	static hda_nid_t conn_0d[] = { 0x09 };
 	int err;
 
 	err = alloc_ad_spec(codec);
@@ -501,6 +503,11 @@ static int patch_ad1983(struct hda_codec *codec)
 	spec->gen.mixer_nid = 0x0e;
 	spec->gen.beep_nid = 0x10;
 	set_beep_amp(spec, 0x10, 0, HDA_OUTPUT);
+
+	/* limit the loopback routes not to confuse the parser */
+	snd_hda_override_conn_list(codec, 0x0c, ARRAY_SIZE(conn_0c), conn_0c);
+	snd_hda_override_conn_list(codec, 0x0d, ARRAY_SIZE(conn_0d), conn_0d);
+
 	err = ad198x_parse_auto_config(codec, false);
 	if (err < 0)
 		goto error;

From cd9a21a831af0af7539a0e37e4455da03df7cf82 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 30 Jan 2014 21:27:25 -0500
Subject: [PATCH 119/171] vmwgfx: Fix unitialized stack read in
 vmw_setup_otable_base

One of the error paths in vmw_setup_otable_base causes us to return with
'ret' having never been set to anything causing us to return whatever was
on the stack.

Found with Coverity

Signed-off-by: Dave Jones <davej@fedoraproject.org>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
index 4910e7b81811..d4a5a19cb8c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
@@ -134,6 +134,7 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv,
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving FIFO space for OTable setup.\n");
+		ret = -ENOMEM;
 		goto out_no_fifo;
 	}
 

From c66f854338253e603a4fb6817069ee23eacf0ae3 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 24 Jan 2014 08:49:45 +0100
Subject: [PATCH 120/171] drm/ttm: Fix TTM object open regression

Commit drm/ttm: ttm object security fixes for render nodes introduced a
regression where, if a TTM object was opened multiple times from the same
open file, the caller would spin uninterruptibly in the kernel.

Fix this.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 37079859afc8..53b51c4e671a 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -292,7 +292,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 
 		if (ret == 0) {
 			ref = drm_hash_entry(hash, struct ttm_ref_object, hash);
-			if (!kref_get_unless_zero(&ref->kref)) {
+			if (kref_get_unless_zero(&ref->kref)) {
 				rcu_read_unlock();
 				break;
 			}

From 923fa4ea382f592dee2ba3b205befb90cbddf3af Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Thu, 30 Jan 2014 16:50:10 -0800
Subject: [PATCH 121/171] genirq: Generic irq chip requires IRQ_DOMAIN

The generic_chip.c uses interfaces from irq_domain.c which is
controlled by the IRQ_DOMAIN config option, but there is no Kconfig
dependency so the build can fail:

linux/kernel/irq/generic-chip.c:400:11: error:
'irq_domain_xlate_onetwocell' undeclared here (not in a function)

Select IRQ_DOMAIN when GENERIC_IRQ_CHIP is selected.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com>
Link: http://lkml.kernel.org/r/1391129410-54548-2-git-send-email-nitin.a.kamble@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # 3.11+
---
 kernel/irq/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 4a1fef09f658..07cbdfea9ae2 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -40,6 +40,7 @@ config IRQ_EDGE_EOI_HANDLER
 # Generic configurable interrupt chip implementation
 config GENERIC_IRQ_CHIP
        bool
+       select IRQ_DOMAIN
 
 # Generic irq_domain hw <--> linux irq number translation
 config IRQ_DOMAIN

From 5044bad43ee573d0b6d90e3ccb7a40c2c7d25eb4 Mon Sep 17 00:00:00 2001
From: Vinayak Kale <vkale@apm.com>
Date: Wed, 5 Feb 2014 09:34:36 +0000
Subject: [PATCH 122/171] arm64: add DSB after icache flush in
 __flush_icache_all()

Add DSB after icache flush to complete the cache maintenance operation.
The function __flush_icache_all() is used only for user space mappings
and an ISB is not required because of an exception return before executing
user instructions. An exception return would behave like an ISB.

Signed-off-by: Vinayak Kale <vkale@apm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cacheflush.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index fea9ee327206..889324981aa4 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *);
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
+	dsb();
 }
 
 #define flush_dcache_mmap_lock(mapping) \

From ccc9e244eb1b9654915634827322932cbafd8244 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Tue, 4 Feb 2014 23:08:57 +0000
Subject: [PATCH 123/171] arm64: Align CMA sizes to PAGE_SIZE

dma_alloc_from_contiguous takes number of pages for a size.
Align up the dma size passed in to page size to avoid truncation
and allocation failures on sizes less than PAGE_SIZE.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 45b5ab54c9ee..fbd76785c5db 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -45,6 +45,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_DMA_CMA)) {
 		struct page *page;
 
+		size = PAGE_ALIGN(size);
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
 							get_order(size));
 		if (!page)

From a55f9929a9b257f84b6cc7b2397379cabd744a22 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 4 Feb 2014 16:01:31 +0000
Subject: [PATCH 124/171] arm64: Invalidate the TLB when replacing pmd entries
 during boot

With the 64K page size configuration, __create_page_tables in head.S
maps enough memory to get started but using 64K pages rather than 512M
sections with a single pgd/pud/pmd entry pointing to a pte table.
create_mapping() may override the pgd/pud/pmd table entry with a block
(section) one if the RAM size is more than 512MB and aligned correctly.
For the end of this block to be accessible, the old TLB entry must be
invalidated.

Cc: <stable@vger.kernel.org>
Reported-by: Mark Salter <msalter@redhat.com>
Tested-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f557ebbe7013..f8dc7e8fce6f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
-		if (((addr | next | phys) & ~SECTION_MASK) == 0)
+		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+			pmd_t old_pmd =*pmd;
 			set_pmd(pmd, __pmd(phys | prot_sect_kernel));
-		else
+			/*
+			 * Check for previous table entries created during
+			 * boot (__create_page_tables) and flush them.
+			 */
+			if (!pmd_none(old_pmd))
+				flush_tlb_all();
+		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys));
+		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
 }

From bfb67a5606376bb32cb6f93dc05cda2e8c2038a5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 5 Feb 2014 10:24:12 +0000
Subject: [PATCH 125/171] arm64: fix typo: s/SERRROR/SERROR/

Somehow SERROR has acquired an additional 'R' in a couple of headers.
This patch removes them before they spread further. As neither instance
is in use yet, no other sites need to be fixed up.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/esr.h     | 2 +-
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 78834123a32e..c4a7f940b387 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -42,7 +42,7 @@
 #define ESR_EL1_EC_SP_ALIGN	(0x26)
 #define ESR_EL1_EC_FP_EXC32	(0x28)
 #define ESR_EL1_EC_FP_EXC64	(0x2C)
-#define ESR_EL1_EC_SERRROR	(0x2F)
+#define ESR_EL1_EC_SERROR	(0x2F)
 #define ESR_EL1_EC_BREAKPT_EL0	(0x30)
 #define ESR_EL1_EC_BREAKPT_EL1	(0x31)
 #define ESR_EL1_EC_SOFTSTP_EL0	(0x32)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c98ef4771c73..0eb398655378 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -231,7 +231,7 @@
 #define ESR_EL2_EC_SP_ALIGN	(0x26)
 #define ESR_EL2_EC_FP_EXC32	(0x28)
 #define ESR_EL2_EC_FP_EXC64	(0x2C)
-#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_SERROR	(0x2F)
 #define ESR_EL2_EC_BREAKPT	(0x30)
 #define ESR_EL2_EC_BREAKPT_HYP	(0x31)
 #define ESR_EL2_EC_SOFTSTP	(0x32)

From 883d50a0ed403446437444a495356ce31e1197a3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 5 Feb 2014 10:24:13 +0000
Subject: [PATCH 126/171] arm64: simplify pgd_alloc

Currently pgd_alloc has a redundant NULL check in its return path that
can be removed with no ill effects. With that removed it's also possible
to return early and eliminate the new_pgd temporary variable.

This patch applies said modifications, making the logic of pgd_alloc
correspond 1-1 with that of pgd_free.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/pgd.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 7083cdada657..62c6101df260 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -32,17 +32,10 @@
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *new_pgd;
-
 	if (PGD_SIZE == PAGE_SIZE)
-		new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+		return (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	else
-		new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL);
-
-	if (!new_pgd)
-		return NULL;
-
-	return new_pgd;
+		return kzalloc(PGD_SIZE, GFP_KERNEL);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)

From 8fcfb99c8e29c73dd8945b6105ef54ca4eeb171e Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Tue, 4 Feb 2014 17:48:28 -0700
Subject: [PATCH 127/171] ACPI / hotplug: Fix panic on eject to ejected device

When an eject request is sent to an ejected ACPI device, the following
panic occurs:

 ACPI: \_SB_.SCK3.CPU3: ACPI_NOTIFY_EJECT_REQUEST event
 BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
 IP: [<ffffffff813a7cfe>] acpi_device_hotplug+0x10b/0x33b
	:
 Call Trace:
 [<ffffffff813a24da>] acpi_hotplug_work_fn+0x1c/0x27
 [<ffffffff8109cbe5>] process_one_work+0x175/0x430
 [<ffffffff8109d7db>] worker_thread+0x11b/0x3a0

This is becase device->handler is NULL in acpi_device_hotplug().
This case was used to fail in acpi_hotplug_notify_cb() as the target
had no acpi_deivce.  However, acpi_device now exists after ejection.

Added a check to verify if acpi_device->handler is valid for an
eject request in acpi_hotplug_notify_cb().  Note that handler passed
from an argument is still valid while acpi_device->handler is NULL.

Fixes: 202317a573b2 (ACPI / scan: Add acpi_device objects for all device nodes in the namespace)
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7384158c7f87..57b053f424d1 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -484,7 +484,6 @@ static void acpi_device_hotplug(void *data, u32 src)
 static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data)
 {
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
-	struct acpi_scan_handler *handler = data;
 	struct acpi_device *adev;
 	acpi_status status;
 
@@ -500,7 +499,10 @@ static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data)
 		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
-		if (!handler->hotplug.enabled) {
+		if (!adev->handler)
+			goto err_out;
+
+		if (!adev->handler->hotplug.enabled) {
 			acpi_handle_err(handle, "Eject disabled\n");
 			ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED;
 			goto err_out;

From 069b918623e1510e58dacf178905a72c3baa3ae4 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Wed, 5 Feb 2014 05:53:04 +0000
Subject: [PATCH 128/171] arm64: vdso: fix coarse clock handling

When __kernel_clock_gettime is called with a CLOCK_MONOTONIC_COARSE or
CLOCK_REALTIME_COARSE clock id, it returns incorrectly to whatever the
caller has placed in x2 ("ret x2" to return from the fast path).  Fix
this by saving x30/LR to x2 only in code that will call
__do_get_tspec, restoring x30 afterward, and using a plain "ret" to
return from the routine.

Also: while the resulting tv_nsec value for CLOCK_REALTIME and
CLOCK_MONOTONIC must be computed using intermediate values that are
left-shifted by cs_shift (x12, set by __do_get_tspec), the results for
coarse clocks should be calculated using unshifted values
(xtime_coarse_nsec is in units of actual nanoseconds).  The current
code shifts intermediate values by x12 unconditionally, but x12 is
uninitialized when servicing a coarse clock.  Fix this by setting x12
to 0 once we know we are dealing with a coarse clock id.

Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso/gettimeofday.S | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index f0a6d10b5211..fe652ffd34c2 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime)
 	bl	__do_get_tspec
 	seqcnt_check w9, 1b
 
+	mov	x30, x2
+
 	cmp	w0, #CLOCK_MONOTONIC
 	b.ne	6f
 
@@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime)
 	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
 	b.ne	8f
 
+	/* xtime_coarse_nsec is already right-shifted */
+	mov	x12, #0
+
 	/* Get coarse timespec. */
 	adr	vdso_data, _vdso_data
 3:	seqcnt_acquire
@@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime)
 	lsr	x11, x11, x12
 	stp	x10, x11, [x1, #TSPEC_TV_SEC]
 	mov	x0, xzr
-	ret	x2
+	ret
 7:
 	mov	x30, x2
 8:	/* Syscall fallback. */

From d4022a335271a48cce49df35d825897914fbffe3 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathan_lynch@mentor.com>
Date: Mon, 3 Feb 2014 19:48:52 +0000
Subject: [PATCH 129/171] arm64: vdso: update wtm fields for
 CLOCK_MONOTONIC_COARSE

Update wall-to-monotonic fields in the VDSO data page
unconditionally.  These are used to service CLOCK_MONOTONIC_COARSE,
which is not guarded by use_syscall.

Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 65d40cf6945a..a7149cae1615 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -238,6 +238,8 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->use_syscall			= use_syscall;
 	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
 	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
+	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
 		vdso_data->cs_cycle_last	= tk->clock->cycle_last;
@@ -245,8 +247,6 @@ void update_vsyscall(struct timekeeper *tk)
 		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;
 		vdso_data->cs_mult		= tk->mult;
 		vdso_data->cs_shift		= tk->shift;
-		vdso_data->wtm_clock_sec	= tk->wall_to_monotonic.tv_sec;
-		vdso_data->wtm_clock_nsec	= tk->wall_to_monotonic.tv_nsec;
 	}
 
 	smp_wmb();

From 6290b53de025dd08a79257ee84173ef7b6d926f7 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 5 Feb 2014 12:03:52 +0000
Subject: [PATCH 130/171] arm64: compat: Wire up new AArch32 syscalls

This patch enables sys_compat, sys_finit_module, sys_sched_setattr and
sys_sched_getattr for compat (AArch32) applications.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/unistd32.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 58125bf008d3..bb8eb8a78e67 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -399,7 +399,10 @@ __SYSCALL(374, compat_sys_sendmmsg)
 __SYSCALL(375, sys_setns)
 __SYSCALL(376, compat_sys_process_vm_readv)
 __SYSCALL(377, compat_sys_process_vm_writev)
-__SYSCALL(378, sys_ni_syscall)			/* 378 for kcmp */
+__SYSCALL(378, sys_kcmp)
+__SYSCALL(379, sys_finit_module)
+__SYSCALL(380, sys_sched_setattr)
+__SYSCALL(381, sys_sched_getattr)
 
 #define __NR_compat_syscalls		379
 

From 530b099dfe8499d639e7fbcad28c4199e2a720c7 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Tue, 4 Feb 2014 02:15:32 +0000
Subject: [PATCH 131/171] security: select correct default LSM_MMAP_MIN_ADDR on
 arm on arm64

Binaries compiled for arm may run on arm64 if CONFIG_COMPAT is
selected.  Set LSM_MMAP_MIN_ADDR to 32768 if ARM64 && COMPAT to
prevent selinux failures launching 32-bit static executables that
are mapped at 0x8000.

Signed-off-by: Colin Cross <ccross@android.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 security/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/Kconfig b/security/Kconfig
index e9c6ac724fef..beb86b500adf 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -103,7 +103,7 @@ config INTEL_TXT
 config LSM_MMAP_MIN_ADDR
 	int "Low address space for LSM to protect from user allocation"
 	depends on SECURITY && SECURITY_SELINUX
-	default 32768 if ARM
+	default 32768 if ARM || (ARM64 && COMPAT)
 	default 65536
 	help
 	  This is the portion of low virtual memory which should be protected

From 1b76af5ce8deb1c775ad1674bd249d782b5b13d9 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 5 Feb 2014 09:18:26 +0100
Subject: [PATCH 132/171] drm/ttm: Don't clear page metadata of imported sg
 pages

These page pointers shouldn't be visible to TTM in the first place, but
until we fix that up, don't clear the page metadata because that
will upset the exporter.

Reported-and-tested-by: Cristoph Haag <haagch.christoph@googleemail.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_tt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 9af99084b344..75f319090043 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -380,6 +380,9 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
 	pgoff_t i;
 	struct page **page = ttm->pages;
 
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+		return;
+
 	for (i = 0; i < ttm->num_pages; ++i) {
 		(*page)->mapping = NULL;
 		(*page++)->index = 0;

From 6a96e15096da6e7491107321cfa660c7c2aa119d Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Tue, 28 Jan 2014 14:45:41 -0500
Subject: [PATCH 133/171] selinux: add SOCK_DIAG_BY_FAMILY to the list of
 netlink message types

The SELinux AF_NETLINK/NETLINK_SOCK_DIAG socket class was missing the
SOCK_DIAG_BY_FAMILY definition which caused SELINUX_ERR messages when
the ss tool was run.

 # ss
 Netid  State  Recv-Q Send-Q  Local Address:Port   Peer Address:Port
 u_str  ESTAB  0      0                  * 14189             * 14190
 u_str  ESTAB  0      0                  * 14145             * 14144
 u_str  ESTAB  0      0                  * 14151             * 14150
 {...}
 # ausearch -m SELINUX_ERR
 ----
 time->Thu Jan 23 11:11:16 2014
 type=SYSCALL msg=audit(1390493476.445:374):
  arch=c000003e syscall=44 success=yes exit=40
  a0=3 a1=7fff03aa11f0 a2=28 a3=0 items=0 ppid=1852 pid=1895
  auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
  tty=pts0 ses=1 comm="ss" exe="/usr/sbin/ss"
  subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null)
 type=SELINUX_ERR msg=audit(1390493476.445:374):
  SELinux:  unrecognized netlink message type=20 for sclass=32

Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/nlmsgtab.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 332ac8a80cf5..2df7b900e259 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -17,6 +17,7 @@
 #include <linux/inet_diag.h>
 #include <linux/xfrm.h>
 #include <linux/audit.h>
+#include <linux/sock_diag.h>
 
 #include "flask.h"
 #include "av_permissions.h"
@@ -78,6 +79,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
 {
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 	{ DCCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+	{ SOCK_DIAG_BY_FAMILY,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 };
 
 static struct nlmsg_perm nlmsg_xfrm_perms[] =

From 2172fa709ab32ca60e86179dc67d0857be8e2c98 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Thu, 30 Jan 2014 11:26:59 -0500
Subject: [PATCH 134/171] SELinux:  Fix kernel BUG on empty security contexts.

Setting an empty security context (length=0) on a file will
lead to incorrectly dereferencing the type and other fields
of the security context structure, yielding a kernel BUG.
As a zero-length security context is never valid, just reject
all such security contexts whether coming from userspace
via setxattr or coming from the filesystem upon a getxattr
request by SELinux.

Setting a security context value (empty or otherwise) unknown to
SELinux in the first place is only possible for a root process
(CAP_MAC_ADMIN), and, if running SELinux in enforcing mode, only
if the corresponding SELinux mac_admin permission is also granted
to the domain by policy.  In Fedora policies, this is only allowed for
specific domains such as livecd for setting down security contexts
that are not defined in the build host policy.

Reproducer:
su
setenforce 0
touch foo
setfattr -n security.selinux foo

Caveat:
Relabeling or removing foo after doing the above may not be possible
without booting with SELinux disabled.  Any subsequent access to foo
after doing the above will also trigger the BUG.

BUG output from Matthew Thode:
[  473.893141] ------------[ cut here ]------------
[  473.962110] kernel BUG at security/selinux/ss/services.c:654!
[  473.995314] invalid opcode: 0000 [#6] SMP
[  474.027196] Modules linked in:
[  474.058118] CPU: 0 PID: 8138 Comm: ls Tainted: G      D   I
3.13.0-grsec #1
[  474.116637] Hardware name: Supermicro X8ST3/X8ST3, BIOS 2.0
07/29/10
[  474.149768] task: ffff8805f50cd010 ti: ffff8805f50cd488 task.ti:
ffff8805f50cd488
[  474.183707] RIP: 0010:[<ffffffff814681c7>]  [<ffffffff814681c7>]
context_struct_compute_av+0xce/0x308
[  474.219954] RSP: 0018:ffff8805c0ac3c38  EFLAGS: 00010246
[  474.252253] RAX: 0000000000000000 RBX: ffff8805c0ac3d94 RCX:
0000000000000100
[  474.287018] RDX: ffff8805e8aac000 RSI: 00000000ffffffff RDI:
ffff8805e8aaa000
[  474.321199] RBP: ffff8805c0ac3cb8 R08: 0000000000000010 R09:
0000000000000006
[  474.357446] R10: 0000000000000000 R11: ffff8805c567a000 R12:
0000000000000006
[  474.419191] R13: ffff8805c2b74e88 R14: 00000000000001da R15:
0000000000000000
[  474.453816] FS:  00007f2e75220800(0000) GS:ffff88061fc00000(0000)
knlGS:0000000000000000
[  474.489254] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  474.522215] CR2: 00007f2e74716090 CR3: 00000005c085e000 CR4:
00000000000207f0
[  474.556058] Stack:
[  474.584325]  ffff8805c0ac3c98 ffffffff811b549b ffff8805c0ac3c98
ffff8805f1190a40
[  474.618913]  ffff8805a6202f08 ffff8805c2b74e88 00068800d0464990
ffff8805e8aac860
[  474.653955]  ffff8805c0ac3cb8 000700068113833a ffff880606c75060
ffff8805c0ac3d94
[  474.690461] Call Trace:
[  474.723779]  [<ffffffff811b549b>] ? lookup_fast+0x1cd/0x22a
[  474.778049]  [<ffffffff81468824>] security_compute_av+0xf4/0x20b
[  474.811398]  [<ffffffff8196f419>] avc_compute_av+0x2a/0x179
[  474.843813]  [<ffffffff8145727b>] avc_has_perm+0x45/0xf4
[  474.875694]  [<ffffffff81457d0e>] inode_has_perm+0x2a/0x31
[  474.907370]  [<ffffffff81457e76>] selinux_inode_getattr+0x3c/0x3e
[  474.938726]  [<ffffffff81455cf6>] security_inode_getattr+0x1b/0x22
[  474.970036]  [<ffffffff811b057d>] vfs_getattr+0x19/0x2d
[  475.000618]  [<ffffffff811b05e5>] vfs_fstatat+0x54/0x91
[  475.030402]  [<ffffffff811b063b>] vfs_lstat+0x19/0x1b
[  475.061097]  [<ffffffff811b077e>] SyS_newlstat+0x15/0x30
[  475.094595]  [<ffffffff8113c5c1>] ? __audit_syscall_entry+0xa1/0xc3
[  475.148405]  [<ffffffff8197791e>] system_call_fastpath+0x16/0x1b
[  475.179201] Code: 00 48 85 c0 48 89 45 b8 75 02 0f 0b 48 8b 45 a0 48
8b 3d 45 d0 b6 00 8b 40 08 89 c6 ff ce e8 d1 b0 06 00 48 85 c0 49 89 c7
75 02 <0f> 0b 48 8b 45 b8 4c 8b 28 eb 1e 49 8d 7d 08 be 80 01 00 00 e8
[  475.255884] RIP  [<ffffffff814681c7>]
context_struct_compute_av+0xce/0x308
[  475.296120]  RSP <ffff8805c0ac3c38>
[  475.328734] ---[ end trace f076482e9d754adc ]---

Reported-by:  Matthew Thode <mthode@mthode.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 security/selinux/ss/services.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index fc5a63a05a1c..f1e46d776544 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1232,6 +1232,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len,
 	struct context context;
 	int rc = 0;
 
+	/* An empty security context is never valid. */
+	if (!scontext_len)
+		return -EINVAL;
+
 	if (!ss_initialized) {
 		int i;
 

From da9846ae15186d491d6e21ebbb5051e1d3c7f652 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 29 Jan 2014 12:04:03 -0500
Subject: [PATCH 135/171] kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP
 flag

kernfs_deactivate() forgot to check whether KERNFS_LOCKDEP is set
before performing lockdep annotations and ends up feeding
uninitialized lockdep_map to lockdep triggering warning like the
following on USB stick hotunplug.

 usb 1-2: USB disconnect, device number 2
 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 1 PID: 62 Comm: khubd Not tainted 3.13.0-work+ #82
 Hardware name: empty empty/S3992, BIOS 080011  10/26/2007
  ffff880065ca7f60 ffff88013a4ffa08 ffffffff81cfb6bd 0000000000000002
  ffff88013a4ffac8 ffffffff810f8530 ffff88013a4fc710 0000000000000002
  ffff880100000000 ffffffff82a3db50 0000000000000001 ffff88013a4fc710
 Call Trace:
  [<ffffffff81cfb6bd>] dump_stack+0x4e/0x7a
  [<ffffffff810f8530>] __lock_acquire+0x1910/0x1e70
  [<ffffffff810f931a>] lock_acquire+0x9a/0x1d0
  [<ffffffff8127c75e>] kernfs_deactivate+0xee/0x130
  [<ffffffff8127d4c8>] kernfs_addrm_finish+0x38/0x60
  [<ffffffff8127d701>] kernfs_remove_by_name_ns+0x51/0xa0
  [<ffffffff8127b4f1>] remove_files.isra.1+0x41/0x80
  [<ffffffff8127b7e7>] sysfs_remove_group+0x47/0xa0
  [<ffffffff8127b873>] sysfs_remove_groups+0x33/0x50
  [<ffffffff8177d66d>] device_remove_attrs+0x4d/0x80
  [<ffffffff8177e25e>] device_del+0x12e/0x1d0
  [<ffffffff819722c2>] usb_disconnect+0x122/0x1a0
  [<ffffffff819749b5>] hub_thread+0x3c5/0x1290
  [<ffffffff810c6a6d>] kthread+0xed/0x110
  [<ffffffff81d0a56c>] ret_from_fork+0x7c/0xb0

Fix it by making kernfs_deactivate() perform lockdep annotations only
if KERNFS_LOCKDEP is set.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fabio Estevam <festevam@gmail.com>
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Jiri Kosina <jkosina@suse.cz>
Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 5104cf5d25c5..bd6e18be6e1a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -187,19 +187,23 @@ static void kernfs_deactivate(struct kernfs_node *kn)
 
 	kn->u.completion = (void *)&wait;
 
-	rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
+	if (kn->flags & KERNFS_LOCKDEP)
+		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 	/* atomic_add_return() is a mb(), put_active() will always see
 	 * the updated kn->u.completion.
 	 */
 	v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
 
 	if (v != KN_DEACTIVATED_BIAS) {
-		lock_contended(&kn->dep_map, _RET_IP_);
+		if (kn->flags & KERNFS_LOCKDEP)
+			lock_contended(&kn->dep_map, _RET_IP_);
 		wait_for_completion(&wait);
 	}
 
-	lock_acquired(&kn->dep_map, _RET_IP_);
-	rwsem_release(&kn->dep_map, 1, _RET_IP_);
+	if (kn->flags & KERNFS_LOCKDEP) {
+		lock_acquired(&kn->dep_map, _RET_IP_);
+		rwsem_release(&kn->dep_map, 1, _RET_IP_);
+	}
 }
 
 /**

From c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 5 Feb 2014 12:54:53 -0800
Subject: [PATCH 136/171] execve: use 'struct filename *' for executable name
 passing

This changes 'do_execve()' to get the executable name as a 'struct
filename', and to free it when it is done.  This is what the normal
users want, and it simplifies and streamlines their error handling.

The controlled lifetime of the executable name also fixes a
use-after-free problem with the trace_sched_process_exec tracepoint: the
lifetime of the passed-in string for kernel users was not at all
obvious, and the user-mode helper code used UMH_WAIT_EXEC to serialize
the pathname allocation lifetime with the execve() having finished,
which in turn meant that the trace point that happened after
mm_release() of the old process VM ended up using already free'd memory.

To solve the kernel string lifetime issue, this simply introduces
"getname_kernel()" that works like the normal user-space getname()
function, except with the source coming from kernel memory.

As Oleg points out, this also means that we could drop the tcomm[] array
from 'struct linux_binprm', since the pathname lifetime now covers
setup_new_exec().  That would be a separate cleanup.

Reported-by: Igor Zhbanov <i.zhbanov@samsung.com>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/parisc/hpux/fs.c   | 15 +-------------
 fs/exec.c               | 45 +++++++++++++++++++----------------------
 fs/namei.c              | 30 +++++++++++++++++++++++++++
 include/linux/binfmts.h |  1 -
 include/linux/fs.h      |  1 +
 include/linux/sched.h   |  3 ++-
 init/main.c             |  2 +-
 kernel/auditsc.c        |  2 +-
 kernel/kmod.c           |  2 +-
 9 files changed, 58 insertions(+), 43 deletions(-)

diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 88d0962de65a..2bedafea3d94 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -33,22 +33,9 @@
 
 int hpux_execve(struct pt_regs *regs)
 {
-	int error;
-	struct filename *filename;
-
-	filename = getname((const char __user *) regs->gr[26]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name,
+	return  do_execve(getname((const char __user *) regs->gr[26]),
 			  (const char __user *const __user *) regs->gr[25],
 			  (const char __user *const __user *) regs->gr[24]);
-
-	putname(filename);
-
-out:
-	return error;
 }
 
 struct hpux_dirent {
diff --git a/fs/exec.c b/fs/exec.c
index e1529b4c79b1..3d78fccdd723 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -748,11 +748,10 @@ EXPORT_SYMBOL(setup_arg_pages);
 
 #endif /* CONFIG_MMU */
 
-struct file *open_exec(const char *name)
+static struct file *do_open_exec(struct filename *name)
 {
 	struct file *file;
 	int err;
-	struct filename tmp = { .name = name };
 	static const struct open_flags open_exec_flags = {
 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
 		.acc_mode = MAY_EXEC | MAY_OPEN,
@@ -760,7 +759,7 @@ struct file *open_exec(const char *name)
 		.lookup_flags = LOOKUP_FOLLOW,
 	};
 
-	file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags);
+	file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
 	if (IS_ERR(file))
 		goto out;
 
@@ -784,6 +783,12 @@ exit:
 	fput(file);
 	return ERR_PTR(err);
 }
+
+struct file *open_exec(const char *name)
+{
+	struct filename tmp = { .name = name };
+	return do_open_exec(&tmp);
+}
 EXPORT_SYMBOL(open_exec);
 
 int kernel_read(struct file *file, loff_t offset,
@@ -1162,7 +1167,7 @@ int prepare_bprm_creds(struct linux_binprm *bprm)
 	return -ENOMEM;
 }
 
-void free_bprm(struct linux_binprm *bprm)
+static void free_bprm(struct linux_binprm *bprm)
 {
 	free_arg_pages(bprm);
 	if (bprm->cred) {
@@ -1432,7 +1437,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int do_execve_common(const char *filename,
+static int do_execve_common(struct filename *filename,
 				struct user_arg_ptr argv,
 				struct user_arg_ptr envp)
 {
@@ -1441,6 +1446,9 @@ static int do_execve_common(const char *filename,
 	struct files_struct *displaced;
 	int retval;
 
+	if (IS_ERR(filename))
+		return PTR_ERR(filename);
+
 	/*
 	 * We move the actual failure in case of RLIMIT_NPROC excess from
 	 * set*uid() to execve() because too many poorly written programs
@@ -1473,7 +1481,7 @@ static int do_execve_common(const char *filename,
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
-	file = open_exec(filename);
+	file = do_open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_unmark;
@@ -1481,8 +1489,7 @@ static int do_execve_common(const char *filename,
 	sched_exec();
 
 	bprm->file = file;
-	bprm->filename = filename;
-	bprm->interp = filename;
+	bprm->filename = bprm->interp = filename->name;
 
 	retval = bprm_mm_init(bprm);
 	if (retval)
@@ -1523,6 +1530,7 @@ static int do_execve_common(const char *filename,
 	acct_update_integrals(current);
 	task_numa_free(current);
 	free_bprm(bprm);
+	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
 	return retval;
@@ -1544,10 +1552,11 @@ out_files:
 	if (displaced)
 		reset_files_struct(displaced);
 out_ret:
+	putname(filename);
 	return retval;
 }
 
-int do_execve(const char *filename,
+int do_execve(struct filename *filename,
 	const char __user *const __user *__argv,
 	const char __user *const __user *__envp)
 {
@@ -1557,7 +1566,7 @@ int do_execve(const char *filename,
 }
 
 #ifdef CONFIG_COMPAT
-static int compat_do_execve(const char *filename,
+static int compat_do_execve(struct filename *filename,
 	const compat_uptr_t __user *__argv,
 	const compat_uptr_t __user *__envp)
 {
@@ -1607,25 +1616,13 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	struct filename *path = getname(filename);
-	int error = PTR_ERR(path);
-	if (!IS_ERR(path)) {
-		error = do_execve(path->name, argv, envp);
-		putname(path);
-	}
-	return error;
+	return do_execve(getname(filename), argv, envp);
 }
 #ifdef CONFIG_COMPAT
 asmlinkage long compat_sys_execve(const char __user * filename,
 	const compat_uptr_t __user * argv,
 	const compat_uptr_t __user * envp)
 {
-	struct filename *path = getname(filename);
-	int error = PTR_ERR(path);
-	if (!IS_ERR(path)) {
-		error = compat_do_execve(path->name, argv, envp);
-		putname(path);
-	}
-	return error;
+	return compat_do_execve(getname(filename), argv, envp);
 }
 #endif
diff --git a/fs/namei.c b/fs/namei.c
index d580df2e6804..385f7817bfcc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -196,6 +196,7 @@ recopy:
 		goto error;
 
 	result->uptr = filename;
+	result->aname = NULL;
 	audit_getname(result);
 	return result;
 
@@ -210,6 +211,35 @@ getname(const char __user * filename)
 	return getname_flags(filename, 0, NULL);
 }
 
+/*
+ * The "getname_kernel()" interface doesn't do pathnames longer
+ * than EMBEDDED_NAME_MAX. Deal with it - you're a kernel user.
+ */
+struct filename *
+getname_kernel(const char * filename)
+{
+	struct filename *result;
+	char *kname;
+	int len;
+
+	len = strlen(filename);
+	if (len >= EMBEDDED_NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	result = __getname();
+	if (unlikely(!result))
+		return ERR_PTR(-ENOMEM);
+
+	kname = (char *)result + sizeof(*result);
+	result->name = kname;
+	result->uptr = NULL;
+	result->aname = NULL;
+	result->separate = false;
+
+	strlcpy(kname, filename, EMBEDDED_NAME_MAX);
+	return result;
+}
+
 #ifdef CONFIG_AUDITSYSCALL
 void putname(struct filename *name)
 {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index fd8bf3219ef7..b4a745d7d9a9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -115,7 +115,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
-extern void free_bprm(struct linux_binprm *);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c59813..d79678c188ad 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2079,6 +2079,7 @@ extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname(const char __user *);
+extern struct filename *getname_kernel(const char *);
 
 enum {
 	FILE_CREATED = 1,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68a0e84463a0..a781dec1cd0b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -128,6 +128,7 @@ struct bio_list;
 struct fs_struct;
 struct perf_event_context;
 struct blk_plug;
+struct filename;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -2311,7 +2312,7 @@ extern void do_group_exit(int);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
-extern int do_execve(const char *,
+extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
diff --git a/init/main.c b/init/main.c
index 2fd9cef70ee8..eb03090cdced 100644
--- a/init/main.c
+++ b/init/main.c
@@ -812,7 +812,7 @@ void __init load_default_modules(void)
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
-	return do_execve(init_filename,
+	return do_execve(getname_kernel(init_filename),
 		(const char __user *const __user *)argv_init,
 		(const char __user *const __user *)envp_init);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 10176cd5956a..7aef2f4b6c64 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1719,7 +1719,7 @@ void audit_putname(struct filename *name)
 	struct audit_context *context = current->audit_context;
 
 	BUG_ON(!context);
-	if (!context->in_syscall) {
+	if (!name->aname || !context->in_syscall) {
 #if AUDIT_DEBUG == 2
 		printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n",
 		       __FILE__, __LINE__, context->serial, name);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b086006c59e7..6b375af4958d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -239,7 +239,7 @@ static int ____call_usermodehelper(void *data)
 
 	commit_creds(new);
 
-	retval = do_execve(sub_info->path,
+	retval = do_execve(getname_kernel(sub_info->path),
 			   (const char __user *const __user *)sub_info->argv,
 			   (const char __user *const __user *)sub_info->envp);
 	if (!retval)

From f8f202348208fa8a2d817b42f250e145fa885620 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 5 Feb 2014 06:51:37 +0100
Subject: [PATCH 137/171] x86: Disable CONFIG_X86_DECODER_SELFTEST in
 allmod/allyesconfigs

It can take some time to validate the image, make sure
{allyes|allmod}config doesn't enable it.

I'd say randconfig will cover it often enough, and the failure is also
borderline build coverage related: you cannot really make the decoder
test fail via source level changes, only with changes in the build
environment, so I agree with Andi that we can disable this one too.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Paul Gortmaker paul.gortmaker@windriver.com>
Suggested-and-acked-by: Andi Kleen andi@firstfloor.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0f3621ed1db6..321a52ccf63a 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -184,6 +184,7 @@ config HAVE_MMIOTRACE_SUPPORT
 config X86_DECODER_SELFTEST
 	bool "x86 instruction decoder selftest"
 	depends on DEBUG_KERNEL && KPROBES
+	depends on !COMPILE_TEST
 	---help---
 	 Perform x86 instruction decoder selftests at build time.
 	 This option is useful for checking the sanity of x86 instruction

From 081cd62a010f97b5bc1d2b0cd123c5abc692b68a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 14 Jan 2014 12:40:09 +0000
Subject: [PATCH 138/171] x86/efi: Allow mapping BGRT on x86-32

CONFIG_X86_32 doesn't map the boot services regions into the EFI memory
map (see commit 700870119f49 ("x86, efi: Don't map Boot Services on
i386")), and so efi_lookup_mapped_addr() will fail to return a valid
address. Executing the ioremap() path in efi_bgrt_init() causes the
following warning on x86-32 because we're trying to ioremap() RAM,

 WARNING: CPU: 0 PID: 0 at arch/x86/mm/ioremap.c:102 __ioremap_caller+0x2ad/0x2c0()
 Modules linked in:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.13.0-0.rc5.git0.1.2.fc21.i686 #1
 Hardware name: DellInc. Venue 8 Pro 5830/09RP78, BIOS A02 10/17/2013
  00000000 00000000 c0c0df08 c09a5196 00000000 c0c0df38 c0448c1e c0b41310
  00000000 00000000 c0b37bc1 00000066 c043bbfd c043bbfd 00e7dfe0 00073eff
  00073eff c0c0df48 c0448ce2 00000009 00000000 c0c0df9c c043bbfd 00078d88
 Call Trace:
  [<c09a5196>] dump_stack+0x41/0x52
  [<c0448c1e>] warn_slowpath_common+0x7e/0xa0
  [<c043bbfd>] ? __ioremap_caller+0x2ad/0x2c0
  [<c043bbfd>] ? __ioremap_caller+0x2ad/0x2c0
  [<c0448ce2>] warn_slowpath_null+0x22/0x30
  [<c043bbfd>] __ioremap_caller+0x2ad/0x2c0
  [<c0718f92>] ? acpi_tb_verify_table+0x1c/0x43
  [<c0719c78>] ? acpi_get_table_with_size+0x63/0xb5
  [<c087cd5e>] ? efi_lookup_mapped_addr+0xe/0xf0
  [<c043bc2b>] ioremap_nocache+0x1b/0x20
  [<c0cb01c8>] ? efi_bgrt_init+0x83/0x10c
  [<c0cb01c8>] efi_bgrt_init+0x83/0x10c
  [<c0cafd82>] efi_late_init+0x8/0xa
  [<c0c9bab2>] start_kernel+0x3ae/0x3c3
  [<c0c9b53b>] ? repair_env_string+0x51/0x51
  [<c0c9b378>] i386_start_kernel+0x12e/0x131

Switch to using early_memremap(), which won't trigger this warning, and
has the added benefit of more accurately conveying what we're trying to
do - map a chunk of memory.

This patch addresses the following bug report,

  https://bugzilla.kernel.org/show_bug.cgi?id=67911

Reported-by: Adam Williamson <awilliam@redhat.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi-bgrt.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 7145ec63c520..4df9591eadad 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -49,7 +49,8 @@ void __init efi_bgrt_init(void)
 
 	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
 	if (!image) {
-		image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
+		image = early_memremap(bgrt_tab->image_address,
+				       sizeof(bmp_header));
 		ioremapped = true;
 		if (!image)
 			return;
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)
 
 	memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
 	if (ioremapped)
-		iounmap(image);
+		early_iounmap(image, sizeof(bmp_header));
 	bgrt_image_size = bmp_header.size;
 
 	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)
 		return;
 
 	if (ioremapped) {
-		image = ioremap(bgrt_tab->image_address, bmp_header.size);
+		image = early_memremap(bgrt_tab->image_address,
+				       bmp_header.size);
 		if (!image) {
 			kfree(bgrt_image);
 			bgrt_image = NULL;
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)
 
 	memcpy_fromio(bgrt_image, image, bgrt_image_size);
 	if (ioremapped)
-		iounmap(image);
+		early_iounmap(image, bmp_header.size);
 }

From 8b7ad1bb3d440da888f2a939dc870eba429b9192 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 5 Feb 2014 14:47:45 +1000
Subject: [PATCH 139/171] drm/mgag200,ast,cirrus: fix regression with
 drm_can_sleep conversion

I totally sign inverted my way out of this one.

Cc: stable@vger.kernel.org
Reported-by: "Sabrina Dubroca" <sd@queasysnail.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_fb.c          | 2 +-
 drivers/gpu/drm/cirrus/cirrus_fbdev.c | 2 +-
 drivers/gpu/drm/mgag200/mgag200_fb.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index 3f65dd6676b2..a28640f47c27 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -65,7 +65,7 @@ static void ast_dirty_update(struct ast_fbdev *afbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = ast_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)
diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
index 2fd4a92162cb..32bbba0a787b 100644
--- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c
+++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
@@ -39,7 +39,7 @@ static void cirrus_dirty_update(struct cirrus_fbdev *afbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = cirrus_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)
diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c
index f9adc27ef32a..13b7dd83faa9 100644
--- a/drivers/gpu/drm/mgag200/mgag200_fb.c
+++ b/drivers/gpu/drm/mgag200/mgag200_fb.c
@@ -41,7 +41,7 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = mgag200_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)

From 7c4c62a04a2a80e3feb5d6c97aca1e413b11c790 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 30 Jan 2014 14:11:12 +1000
Subject: [PATCH 140/171] drm/radeon: allow geom rings to be setup on r600/r700
 (v2)

the evergreen CS parser has allowed this for a while, just port
the code to the r600 one.

This is required before geom shaders can be made work.

v2: agd5f: minor cleanup and add additional 7xx reg.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_cs.c     | 18 ++++++++++++++++--
 drivers/gpu/drm/radeon/radeon_drv.c  |  3 ++-
 drivers/gpu/drm/radeon/reg_srcs/r600 |  1 +
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 7b399dc5fd54..2812c7d1ae6f 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1007,8 +1007,22 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case R_008C64_SQ_VSTMP_RING_SIZE:
 	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
 		/* get value to populate the IB don't remove */
-		tmp =radeon_get_ib_value(p, idx);
-		ib[idx] = 0;
+		/*tmp =radeon_get_ib_value(p, idx);
+		  ib[idx] = 0;*/
+		break;
+	case SQ_ESGS_RING_BASE:
+	case SQ_GSVS_RING_BASE:
+	case SQ_ESTMP_RING_BASE:
+	case SQ_GSTMP_RING_BASE:
+	case SQ_PSTMP_RING_BASE:
+	case SQ_VSTMP_RING_BASE:
+		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
+		if (r) {
+			dev_warn(p->dev, "bad SET_CONTEXT_REG "
+					"0x%04X\n", reg);
+			return -EINVAL;
+		}
+		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SQ_CONFIG:
 		track->sq_config = radeon_get_ib_value(p, idx);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index ec8c388eec17..84a1bbb75f91 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -78,9 +78,10 @@
  *   2.34.0 - Add CIK tiling mode array query
  *   2.35.0 - Add CIK macrotile mode array query
  *   2.36.0 - Fix CIK DCE tiling setup
+ *   2.37.0 - allow GS ring setup on r6xx/r7xx
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	36
+#define KMS_DRIVER_MINOR	37
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index 20bfbda7b3f1..ec0c6829c1dc 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -18,6 +18,7 @@ r600 0x9400
 0x00028A3C VGT_GROUP_VECT_1_FMT_CNTL
 0x00028A40 VGT_GS_MODE
 0x00028A6C VGT_GS_OUT_PRIM_TYPE
+0x00028B38 VGT_GS_MAX_VERT_OUT
 0x000088C8 VGT_GS_PER_ES
 0x000088E8 VGT_GS_PER_VS
 0x000088D4 VGT_GS_VERTEX_REUSE

From 4a7ac12eedd190cdf071e61145defa73df1675c0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Feb 2014 11:30:48 +0000
Subject: [PATCH 141/171] arm64: barriers: allow dsb macro to take option
 parameter

The dsb instruction takes an option specifying both the target access
types and shareability domain.

This patch allows such an option to be passed to the dsb macro,
resulting in potentially more efficient code. Currently the option is
ignored until all callers are updated (unlike ARM, the option is
mandated by the assembler).

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 78e20ba8806b..409ca370cfe2 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,7 +25,7 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
-#define dsb()		asm volatile("dsb sy" : : : "memory")
+#define dsb(opt)	asm volatile("dsb sy" : : : "memory")
 
 #define mb()		dsb()
 #define rmb()		asm volatile("dsb ld" : : : "memory")

From 97b8b16bfcb7f5ccc1b3c10c08580f9f1acb61ac Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Wed, 5 Feb 2014 22:05:44 +0200
Subject: [PATCH 142/171] MIPS: fpu.h: Fix build when CONFIG_BUG is not set

__enable_fpu produces a build failure when CONFIG_BUG is not set:

In file included from arch/mips/kernel/cpu-probe.c:24:0:
arch/mips/include/asm/fpu.h: In function '__enable_fpu':
arch/mips/include/asm/fpu.h:77:1: error: control reaches end of non-void function [-Werror=return-type]

This is regression introduced in 3.14-rc1. Fix that.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Acked-by: Paul Burton <paul.burton@imgtec.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6504/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/fpu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index cfe092fc720d..6b9749540edf 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -74,6 +74,8 @@ static inline int __enable_fpu(enum fpu_mode mode)
 	default:
 		BUG();
 	}
+
+	return SIGFPE;
 }
 
 #define __disable_fpu()							\

From e7f2a444891cb39f11d5429467d0fd7e011fe7fe Mon Sep 17 00:00:00 2001
From: Florian Vaussard <florian.vaussard@epfl.ch>
Date: Wed, 5 Feb 2014 07:51:22 +0100
Subject: [PATCH 143/171] pinctrl: do not init debugfs entries for
 unimplemented functionalities

Commit c420619 "pinctrl: pinconf: remove checks on ops->pin_config_get"
removed the check on (ops != NULL) when performing pinconf_pins_show() or
pinconf_groups_show(). As these entries are always enabled, even if
pinconf is not supported, reading will result in an oops due to NULL
ops.

Instead of checking for ops, remove the corresponding debugfs entries if
pinconf and/or pinmux are not implemented.

Tested on OMAP3 (pinctrl-single).

Cc: stable@vger.kernel.org
Signed-off-by: Florian Vaussard <florian.vaussard@epfl.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index cab020a58bf5..c0fe6091566a 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1644,8 +1644,10 @@ static void pinctrl_init_device_debugfs(struct pinctrl_dev *pctldev)
 			    device_root, pctldev, &pinctrl_groups_ops);
 	debugfs_create_file("gpio-ranges", S_IFREG | S_IRUGO,
 			    device_root, pctldev, &pinctrl_gpioranges_ops);
-	pinmux_init_device_debugfs(device_root, pctldev);
-	pinconf_init_device_debugfs(device_root, pctldev);
+	if (pctldev->desc->pmxops)
+		pinmux_init_device_debugfs(device_root, pctldev);
+	if (pctldev->desc->confops)
+		pinconf_init_device_debugfs(device_root, pctldev);
 }
 
 static void pinctrl_remove_device_debugfs(struct pinctrl_dev *pctldev)

From 5b232c5addc02980cfce451575f1d1f975bdb04e Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 5 Feb 2014 19:11:34 +0530
Subject: [PATCH 144/171] pinctrl: tegra: return correct error type

When memory allocation failed, drive should return error as ENOMEM.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index a2e93a2b5ff4..e767355ab0ad 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -645,7 +645,7 @@ int tegra_pinctrl_probe(struct platform_device *pdev,
 				 GFP_KERNEL);
 	if (!pmx->regs) {
 		dev_err(&pdev->dev, "Can't alloc regs pointer\n");
-		return -ENODEV;
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < pmx->nbanks; i++) {

From 6583327c4dd55acbbf2a6f25e775b28b3abf9a42 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Date: Thu, 6 Feb 2014 15:58:20 +0100
Subject: [PATCH 145/171] x86, hweight: Fix BUG when booting with
 CONFIG_GCOV_PROFILE_ALL=y

Commit d61931d89b, "x86: Add optimized popcnt variants" introduced
compile flag -fcall-saved-rdi for lib/hweight.c. When combined with
options -fprofile-arcs and -O2, this flag causes gcc to generate
broken constructor code. As a result, a 64 bit x86 kernel compiled
with CONFIG_GCOV_PROFILE_ALL=y prints message "gcov: could not create
file" and runs into sproadic BUGs during boot.

The gcc people indicate that these kinds of problems are endemic when
using ad hoc calling conventions.  It is therefore best to treat any
file compiled with ad hoc calling conventions as an isolated
environment and avoid things like profiling or coverage analysis,
since those subsystems assume a "normal" calling conventions.

This patch avoids the bug by excluding lib/hweight.o from coverage
profiling.

Reported-by: Meelis Roos <mroos@linux.ee>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/52F3A30C.7050205@linux.vnet.ibm.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
---
 lib/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Makefile b/lib/Makefile
index a459c31e8c6b..04944e9993ed 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
+GCOV_PROFILE_hweight.o := n
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 

From 75a1ba5b2c529db60ca49626bcaf0bddf4548438 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 3 Feb 2014 21:41:44 +0100
Subject: [PATCH 146/171] x86, microcode, AMD: Unify valid container checks

For additional coverage, BorisO and friends unknowlingly did swap AMD
microcode with Intel microcode blobs in order to see what happens. What
did happen on 32-bit was

[    5.722656] BUG: unable to handle kernel paging request at be3a6008
[    5.722693] IP: [<c106d6b4>] load_microcode_amd+0x24/0x3f0
[    5.722716] *pdpt = 0000000000000000 *pde = 0000000000000000

because there was a valid initrd there but without valid microcode in it
and the container check happened *after* the relocated ramdisk handling
on 32-bit, which was clearly wrong.

While at it, take care of the ramdisk relocation on both 32- and 64-bit
as it is done on both. Also, comment what we're doing because this code
is a bit tricky.

Reported-and-tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1391460104-7261-1-git-send-email-bp@alien8.de
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/microcode/amd_early.c | 43 +++++++++++++++--------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 8384c0fa206f..617a9e284245 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -285,6 +285,15 @@ static void __init collect_cpu_sig_on_bsp(void *arg)
 
 	uci->cpu_sig.sig = cpuid_eax(0x00000001);
 }
+
+static void __init get_bsp_sig(void)
+{
+	unsigned int bsp = boot_cpu_data.cpu_index;
+	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+	if (!uci->cpu_sig.sig)
+		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+}
 #else
 void load_ucode_amd_ap(void)
 {
@@ -337,31 +346,37 @@ void load_ucode_amd_ap(void)
 
 int __init save_microcode_in_initrd_amd(void)
 {
+	unsigned long cont;
 	enum ucode_state ret;
 	u32 eax;
 
-#ifdef CONFIG_X86_32
-	unsigned int bsp = boot_cpu_data.cpu_index;
-	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+	if (!container)
+		return -EINVAL;
 
-	if (!uci->cpu_sig.sig)
-		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+#ifdef CONFIG_X86_32
+	get_bsp_sig();
+	cont = (unsigned long)container;
+#else
+	/*
+	 * We need the physical address of the container for both bitness since
+	 * boot_params.hdr.ramdisk_image is a physical address.
+	 */
+	cont = __pa(container);
+#endif
 
 	/*
-	 * Take into account the fact that the ramdisk might get relocated
-	 * and therefore we need to recompute the container's position in
-	 * virtual memory space.
+	 * Take into account the fact that the ramdisk might get relocated and
+	 * therefore we need to recompute the container's position in virtual
+	 * memory space.
 	 */
-	container = (u8 *)(__va((u32)relocated_ramdisk) +
-			   ((u32)container - boot_params.hdr.ramdisk_image));
-#endif
+	if (relocated_ramdisk)
+		container = (u8 *)(__va(relocated_ramdisk) +
+			     (cont - boot_params.hdr.ramdisk_image));
+
 	if (ucode_new_rev)
 		pr_info("microcode: updated early to new patch_level=0x%08x\n",
 			ucode_new_rev);
 
-	if (!container)
-		return -EINVAL;
-
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 

From 277cba1d28b99169f2a056d0d6f98a4039531cb8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 6 Feb 2014 12:04:19 -0800
Subject: [PATCH 147/171] Documentation/kernel-parameters.txt: fix memmap=
 language

Clean up descriptions of memmap= boot options.

Add periods (full stops), drop commas, change "used" to "reserved" or
"marked".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andiry Xu <andiry.xu@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8f441dab0396..7116fda7077f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			option description.
 
 	memmap=nn[KMG]@ss[KMG]
-			[KNL] Force usage of a specific region of memory
-			Region of memory to be used, from ss to ss+nn.
+			[KNL] Force usage of a specific region of memory.
+			Region of memory to be used is from ss to ss+nn.
 
 	memmap=nn[KMG]#ss[KMG]
 			[KNL,ACPI] Mark specific memory as ACPI data.
-			Region of memory to be used, from ss to ss+nn.
+			Region of memory to be marked is from ss to ss+nn.
 
 	memmap=nn[KMG]$ss[KMG]
 			[KNL,ACPI] Mark specific memory as reserved.
-			Region of memory to be used, from ss to ss+nn.
+			Region of memory to be reserved is from ss to ss+nn.
 			Example: Exclude memory from 0x18690000-0x1869ffff
 			         memmap=64K$0x18690000
 			         or

From fb951eb5e167de9f07973ce0dfff674a2019bfab Mon Sep 17 00:00:00 2001
From: Zongxun Wang <wangzongxun@huawei.com>
Date: Thu, 6 Feb 2014 12:04:20 -0800
Subject: [PATCH 148/171] ocfs2: free allocated clusters if error occurs after
 ocfs2_claim_clusters

Even if using the same jbd2 handle, we cannot rollback a transaction.
So once some error occurs after successfully allocating clusters, the
allocated clusters will never be used and it means they are lost.  For
example, call ocfs2_claim_clusters successfully when expanding a file,
but failed in ocfs2_insert_extent.  So we need free the allocated
clusters if they are not used indeed.

Signed-off-by: Zongxun Wang <wangzongxun@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Acked-by: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/alloc.c      | 38 +++++++++++++++++++++++++++++++++++---
 fs/ocfs2/localalloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/localalloc.h |  6 ++++++
 3 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8750ae1b8636..aada5801567a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				enum ocfs2_alloc_restarted *reason_ret)
 {
 	int status = 0, err = 0;
+	int need_free = 0;
 	int free_extents;
 	enum ocfs2_alloc_restarted reason = RESTART_NONE;
 	u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto leave;
+		need_free = 1;
+		goto bail;
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
-		goto leave;
+		need_free = 1;
+		goto bail;
 	}
 
 	ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 		reason = RESTART_TRANS;
 	}
 
+bail:
+	if (need_free) {
+		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+					bit_off, num_bits);
+		else
+			ocfs2_free_clusters(handle,
+					data_ac->ac_inode,
+					data_ac->ac_bh,
+					ocfs2_clusters_to_blocks(osb->sb, bit_off),
+					num_bits);
+	}
+
 leave:
 	if (reason_ret)
 		*reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 					 struct buffer_head *di_bh)
 {
 	int ret, i, has_data, num_pages = 0;
+	int need_free = 0;
+	u32 bit_off, num;
 	handle_t *handle;
 	u64 uninitialized_var(block);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 	}
 
 	if (has_data) {
-		u32 bit_off, num;
 		unsigned int page_end;
 		u64 phys;
 
@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6938,6 +6958,18 @@ out_commit:
 		dquot_free_space_nodirty(inode,
 					  ocfs2_clusters_to_bytes(osb->sb, 1));
 
+	if (need_free) {
+		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+					bit_off, num);
+		else
+			ocfs2_free_clusters(handle,
+					data_ac->ac_inode,
+					data_ac->ac_bh,
+					ocfs2_clusters_to_blocks(osb->sb, bit_off),
+					num);
+	}
+
 	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index cd5496b7a0a3..044013455621 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -781,6 +781,48 @@ bail:
 	return status;
 }
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+				handle_t *handle,
+				struct ocfs2_alloc_context *ac,
+				u32 bit_off,
+				u32 num_bits)
+{
+	int status, start;
+	u32 clear_bits;
+	struct inode *local_alloc_inode;
+	void *bitmap;
+	struct ocfs2_dinode *alloc;
+	struct ocfs2_local_alloc *la;
+
+	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
+
+	local_alloc_inode = ac->ac_inode;
+	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+	la = OCFS2_LOCAL_ALLOC(alloc);
+
+	bitmap = la->la_bitmap;
+	start = bit_off - le32_to_cpu(la->la_bm_off);
+	clear_bits = num_bits;
+
+	status = ocfs2_journal_access_di(handle,
+			INODE_CACHE(local_alloc_inode),
+			osb->local_alloc_bh,
+			OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	while (clear_bits--)
+		ocfs2_clear_bit(start++, bitmap);
+
+	le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
+	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
+
+bail:
+	return status;
+}
+
 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 {
 	u32 count;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 1be9b5864460..44a7d1fb2dec 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 				 u32 *bit_off,
 				 u32 *num_bits);
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+				handle_t *handle,
+				struct ocfs2_alloc_context *ac,
+				u32 bit_off,
+				u32 num_bits);
+
 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
 				      unsigned int num_clusters);
 void ocfs2_la_enable_worker(struct work_struct *work);

From 579f82901f6f41256642936d7e632f3979ad76d4 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Thu, 6 Feb 2014 12:04:21 -0800
Subject: [PATCH 149/171] swap: add a simple detector for inappropriate swapin
 readahead

This is a patch to improve swap readahead algorithm.  It's from Hugh and
I slightly changed it.

Hugh's original changelog:

swapin readahead does a blind readahead, whether or not the swapin is
sequential.  This may be ok on harddisk, because large reads have
relatively small costs, and if the readahead pages are unneeded they can
be reclaimed easily - though, what if their allocation forced reclaim of
useful pages? But on SSD devices large reads are more expensive than
small ones: if the readahead pages are unneeded, reading them in caused
significant overhead.

This patch adds very simplistic random read detection.  Stealing the
PageReadahead technique from Konstantin Khlebnikov's patch, avoiding the
vma/anon_vma sophistications of Shaohua Li's patch, swapin_nr_pages()
simply looks at readahead's current success rate, and narrows or widens
its readahead window accordingly.  There is little science to its
heuristic: it's about as stupid as can be whilst remaining effective.

The table below shows elapsed times (in centiseconds) when running a
single repetitive swapping load across a 1000MB mapping in 900MB ram
with 1GB swap (the harddisk tests had taken painfully too long when I
used mem=500M, but SSD shows similar results for that).

Vanilla is the 3.6-rc7 kernel on which I started; Shaohua denotes his
Sep 3 patch in mmotm and linux-next; HughOld denotes my Oct 1 patch
which Shaohua showed to be defective; HughNew this Nov 14 patch, with
page_cluster as usual at default of 3 (8-page reads); HughPC4 this same
patch with page_cluster 4 (16-page reads); HughPC0 with page_cluster 0
(1-page reads: no readahead).

HDD for swapping to harddisk, SSD for swapping to VertexII SSD.  Seq for
sequential access to the mapping, cycling five times around; Rand for
the same number of random touches.  Anon for a MAP_PRIVATE anon mapping;
Shmem for a MAP_SHARED anon mapping, equivalent to tmpfs.

One weakness of Shaohua's vma/anon_vma approach was that it did not
optimize Shmem: seen below.  Konstantin's approach was perhaps mistuned,
50% slower on Seq: did not compete and is not shown below.

HDD        Vanilla Shaohua HughOld HughNew HughPC4 HughPC0
Seq Anon     73921   76210   75611   76904   78191  121542
Seq Shmem    73601   73176   73855   72947   74543  118322
Rand Anon   895392  831243  871569  845197  846496  841680
Rand Shmem 1058375 1053486  827935  764955  764376  756489

SSD        Vanilla Shaohua HughOld HughNew HughPC4 HughPC0
Seq Anon     24634   24198   24673   25107   21614   70018
Seq Shmem    24959   24932   25052   25703   22030   69678
Rand Anon    43014   26146   28075   25989   26935   25901
Rand Shmem   45349   45215   28249   24268   24138   24332

These tests are, of course, two extremes of a very simple case: under
heavier mixed loads I've not yet observed any consistent improvement or
degradation, and wider testing would be welcome.

Shaohua Li:

Test shows Vanilla is slightly better in sequential workload than Hugh's
patch.  I observed with Hugh's patch sometimes the readahead size is
shrinked too fast (from 8 to 1 immediately) in sequential workload if
there is no hit.  And in such case, continuing doing readahead is good
actually.

I don't prepare a sophisticated algorithm for the sequential workload
because so far we can't guarantee sequential accessed pages are swap out
sequentially.  So I slightly change Hugh's heuristic - don't shrink
readahead size too fast.

Here is my test result (unit second, 3 runs average):
	Vanilla		Hugh		New
Seq	356		370		360
Random	4525		2447		2444

Attached graph is the swapin/swapout throughput I collected with 'vmstat
2'.  The first part is running a random workload (till around 1200 of
the x-axis) and the second part is running a sequential workload.
swapin and swapout throughput are almost identical in steady state in
both workloads.  These are expected behavior.  while in Vanilla, swapin
is much bigger than swapout especially in random workload (because wrong
readahead).

Original patches by: Shaohua Li and Konstantin Khlebnikov.

[fengguang.wu@intel.com: swapin_nr_pages() can be static]
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  4 +--
 mm/swap_state.c            | 63 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e464b4e987e8..d1fe1a761047 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
-/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+/* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
+PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
 
 #ifdef CONFIG_HIGHMEM
 /*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 98e85e9c2b2d..e76ace30d436 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
 	return ret;
 }
 
+static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
+
 void show_swap_cache_info(void)
 {
 	printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 
 	page = find_get_page(swap_address_space(entry), entry.val);
 
-	if (page)
+	if (page) {
 		INC_CACHE_INFO(find_success);
+		if (TestClearPageReadahead(page))
+			atomic_inc(&swapin_readahead_hits);
+	}
 
 	INC_CACHE_INFO(find_total);
 	return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	return found_page;
 }
 
+static unsigned long swapin_nr_pages(unsigned long offset)
+{
+	static unsigned long prev_offset;
+	unsigned int pages, max_pages, last_ra;
+	static atomic_t last_readahead_pages;
+
+	max_pages = 1 << ACCESS_ONCE(page_cluster);
+	if (max_pages <= 1)
+		return 1;
+
+	/*
+	 * This heuristic has been found to work well on both sequential and
+	 * random loads, swapping to hard disk or to SSD: please don't ask
+	 * what the "+ 2" means, it just happens to work well, that's all.
+	 */
+	pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
+	if (pages == 2) {
+		/*
+		 * We can have no readahead hits to judge by: but must not get
+		 * stuck here forever, so check for an adjacent offset instead
+		 * (and don't even bother to check whether swap type is same).
+		 */
+		if (offset != prev_offset + 1 && offset != prev_offset - 1)
+			pages = 1;
+		prev_offset = offset;
+	} else {
+		unsigned int roundup = 4;
+		while (roundup < pages)
+			roundup <<= 1;
+		pages = roundup;
+	}
+
+	if (pages > max_pages)
+		pages = max_pages;
+
+	/* Don't shrink readahead too fast */
+	last_ra = atomic_read(&last_readahead_pages) / 2;
+	if (pages < last_ra)
+		pages = last_ra;
+	atomic_set(&last_readahead_pages, pages);
+
+	return pages;
+}
+
 /**
  * swapin_readahead - swap in pages in hope we need them soon
  * @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
-	unsigned long offset = swp_offset(entry);
+	unsigned long entry_offset = swp_offset(entry);
+	unsigned long offset = entry_offset;
 	unsigned long start_offset, end_offset;
-	unsigned long mask = (1UL << page_cluster) - 1;
+	unsigned long mask;
 	struct blk_plug plug;
 
+	mask = swapin_nr_pages(offset) - 1;
+	if (!mask)
+		goto skip;
+
 	/* Read a page_cluster sized and aligned cluster around offset. */
 	start_offset = offset & ~mask;
 	end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 						gfp_mask, vma, addr);
 		if (!page)
 			continue;
+		if (offset != entry_offset)
+			SetPageReadahead(page);
 		page_cache_release(page);
 	}
 	blk_finish_plug(&plug);
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
+skip:
 	return read_swap_cache_async(entry, gfp_mask, vma, addr);
 }

From f893ab41e4dae2fe8991faf5d86d029068d1ef3a Mon Sep 17 00:00:00 2001
From: Weijie Yang <weijie.yang@samsung.com>
Date: Thu, 6 Feb 2014 12:04:23 -0800
Subject: [PATCH 150/171] mm/swap: fix race on swap_info reuse between swapoff
 and swapon

swapoff clear swap_info's SWP_USED flag prematurely and free its
resources after that.  A concurrent swapon will reuse this swap_info
while its previous resources are not cleared completely.

These late freed resources are:
 - p->percpu_cluster
 - swap_cgroup_ctrl[type]
 - block_device setting
 - inode->i_flags &= ~S_SWAPFILE

This patch clears the SWP_USED flag after all its resources are freed,
so that swapon can reuse this swap_info by alloc_swap_info() safely.

[akpm@linux-foundation.org: tidy up code comment]
Signed-off-by: Weijie Yang <weijie.yang@samsung.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index c6c13b050a58..4a7f7e6992b6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->swap_map = NULL;
 	cluster_info = p->cluster_info;
 	p->cluster_info = NULL;
-	p->flags = 0;
 	frontswap_map = frontswap_map_get(p);
 	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
@@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		mutex_unlock(&inode->i_mutex);
 	}
 	filp_close(swap_file, NULL);
+
+	/*
+	 * Clear the SWP_USED flag after all resources are freed so that swapon
+	 * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
+	 * not hold p->lock after we cleared its SWP_WRITEOK.
+	 */
+	spin_lock(&swap_lock);
+	p->flags = 0;
+	spin_unlock(&swap_lock);
+
 	err = 0;
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);

From a85d9df1ea1d23682a0ed1e100e6965006595d06 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 6 Feb 2014 12:04:24 -0800
Subject: [PATCH 151/171] mm: __set_page_dirty_nobuffers() uses
 spin_lock_irqsave() instead of spin_lock_irq()

During aio stress test, we observed the following lockdep warning.  This
mean AIO+numa_balancing is currently deadlockable.

The problem is, aio_migratepage disable interrupt, but
__set_page_dirty_nobuffers unintentionally enable it again.

Generally, all helper function should use spin_lock_irqsave() instead of
spin_lock_irq() because they don't know caller at all.

   other info that might help us debug this:
    Possible unsafe locking scenario:

          CPU0
          ----
     lock(&(&ctx->completion_lock)->rlock);
     <Interrupt>
       lock(&(&ctx->completion_lock)->rlock);

    *** DEADLOCK ***

      dump_stack+0x19/0x1b
      print_usage_bug+0x1f7/0x208
      mark_lock+0x21d/0x2a0
      mark_held_locks+0xb9/0x140
      trace_hardirqs_on_caller+0x105/0x1d0
      trace_hardirqs_on+0xd/0x10
      _raw_spin_unlock_irq+0x2c/0x50
      __set_page_dirty_nobuffers+0x8c/0xf0
      migrate_page_copy+0x434/0x540
      aio_migratepage+0xb1/0x140
      move_to_new_page+0x7d/0x230
      migrate_pages+0x5e5/0x700
      migrate_misplaced_page+0xbc/0xf0
      do_numa_page+0x102/0x190
      handle_pte_fault+0x241/0x970
      handle_mm_fault+0x265/0x370
      __do_page_fault+0x172/0x5a0
      do_page_fault+0x1a/0x70
      page_fault+0x28/0x30

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d30e2cfe804..7106cb1aca8e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page)
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
 		struct address_space *mapping2;
+		unsigned long flags;
 
 		if (!mapping)
 			return 1;
 
-		spin_lock_irq(&mapping->tree_lock);
+		spin_lock_irqsave(&mapping->tree_lock, flags);
 		mapping2 = page_mapping(page);
 		if (mapping2) { /* Race with truncate? */
 			BUG_ON(mapping2 != mapping);
@@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 			radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 		}
-		spin_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		if (mapping->host) {
 			/* !PageAnon && !swapper_space */
 			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

From 017c217a26e9bf6948482f751b30d0507e30a7d0 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Thu, 6 Feb 2014 12:04:25 -0800
Subject: [PATCH 152/171] arch/x86/mm/numa.c: initialize numa_kernel_nodes in
 numa_clear_kernel_node_hotplug()

On-stack variable numa_kernel_nodes in numa_clear_kernel_node_hotplug()
was not initialized.  So we need to initialize it.

[akpm@linux-foundation.org: use NODE_MASK_NONE, per David]
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Tested-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Reported-by: Dave Jones <davej@redhat.com>
Reported-by: David Rientjes <rientjes@google.com>
Tested-by: Dave Jones <davej@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 81b2750f3666..45ec9d72b6dd 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -565,7 +565,7 @@ static void __init numa_init_array(void)
 static void __init numa_clear_kernel_node_hotplug(void)
 {
 	int i, nid;
-	nodemask_t numa_kernel_nodes;
+	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
 	unsigned long start, end;
 	struct memblock_type *type = &memblock.reserved;
 

From 7bc35fdde6724549a0239b71e08b9f33d8bf2bfb Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Thu, 6 Feb 2014 12:04:27 -0800
Subject: [PATCH 153/171] arch/x86/mm/numa.c: fix array index overflow when
 synchronizing nid to memblock.reserved.

The following path will cause array out of bound.

memblock_add_region() will always set nid in memblock.reserved to
MAX_NUMNODES.  In numa_register_memblks(), after we set all nid to
correct valus in memblock.reserved, we called setup_node_data(), and
used memblock_alloc_nid() to allocate memory, with nid set to
MAX_NUMNODES.

The nodemask_t type can be seen as a bit array.  And the index is 0 ~
MAX_NUMNODES-1.

After that, when we call node_set() in numa_clear_kernel_node_hotplug(),
the nodemask_t got an index of value MAX_NUMNODES, which is out of [0 ~
MAX_NUMNODES-1].

See below:

numa_init()
 |---> numa_register_memblks()
 |      |---> memblock_set_node(memory)		set correct nid in memblock.memory
 |      |---> memblock_set_node(reserved)	set correct nid in memblock.reserved
 |      |......
 |      |---> setup_node_data()
 |             |---> memblock_alloc_nid()	here, nid is set to MAX_NUMNODES (1024)
 |......
 |---> numa_clear_kernel_node_hotplug()
        |---> node_set()			here, we have an index 1024, and overflowed

This patch moves nid setting to numa_clear_kernel_node_hotplug() to fix
this problem.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Tested-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Reported-by: Dave Jones <davej@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Tested-by: Dave Jones <davej@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/numa.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 45ec9d72b6dd..27aa0455fab3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 		struct numa_memblk *mb = &mi->blk[i];
 		memblock_set_node(mb->start, mb->end - mb->start,
 				  &memblock.memory, mb->nid);
-
-		/*
-		 * At this time, all memory regions reserved by memblock are
-		 * used by the kernel. Set the nid in memblock.reserved will
-		 * mark out all the nodes the kernel resides in.
-		 */
-		memblock_set_node(mb->start, mb->end - mb->start,
-				  &memblock.reserved, mb->nid);
 	}
 
 	/*
@@ -569,6 +561,17 @@ static void __init numa_clear_kernel_node_hotplug(void)
 	unsigned long start, end;
 	struct memblock_type *type = &memblock.reserved;
 
+	/*
+	 * At this time, all memory regions reserved by memblock are
+	 * used by the kernel. Set the nid in memblock.reserved will
+	 * mark out all the nodes the kernel resides in.
+	 */
+	for (i = 0; i < numa_meminfo.nr_blks; i++) {
+		struct numa_memblk *mb = &numa_meminfo.blk[i];
+		memblock_set_node(mb->start, mb->end - mb->start,
+				  &memblock.reserved, mb->nid);
+	}
+
 	/* Mark all kernel nodes. */
 	for (i = 0; i < type->cnt; i++)
 		node_set(type->regions[i].nid, numa_kernel_nodes);

From 227d53b397a32a7614667b3ecaf1d89902fb6c12 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 6 Feb 2014 12:04:28 -0800
Subject: [PATCH 154/171] mm: __set_page_dirty uses spin_lock_irqsave instead
 of spin_lock_irq

To use spin_{un}lock_irq is dangerous if caller disabled interrupt.
During aio buffer migration, we have a possibility to see the following
call stack.

aio_migratepage  [disable interrupt]
  migrate_page_copy
    clear_page_dirty_for_io
      set_page_dirty
        __set_page_dirty_buffers
          __set_page_dirty
            spin_lock_irq

This mean, current aio migration is a deadlockable.  spin_lock_irqsave
is a safer alternative and we should use it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: David Rientjes rientjes@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 651dba10b9c2..27265a8b43c1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
 static void __set_page_dirty(struct page *page,
 		struct address_space *mapping, int warn)
 {
-	spin_lock_irq(&mapping->tree_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	spin_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 }
 

From 4f545a4ba158600b7a780b9daaf9ffabb934cdb6 Mon Sep 17 00:00:00 2001
From: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Date: Thu, 6 Feb 2014 18:03:17 +0000
Subject: [PATCH 155/171] hwmon: (da9055) Remove use of regmap_irq_get_virq()

Remove use of regmap_irq_get_virq() in driver probe which was
conflicting with use of platform_get_irq_byname().
platform_get_irq_byname() already returns the VIRQ number due
to MFD core translation so using regmap_irq_get_virq() on that
returned value results in an incorrect IRQ being requested.
The driver probes then fail because of this.

Signed-off-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/da9055-hwmon.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c
index 029ecabc4380..73b3865f1207 100644
--- a/drivers/hwmon/da9055-hwmon.c
+++ b/drivers/hwmon/da9055-hwmon.c
@@ -278,10 +278,6 @@ static int da9055_hwmon_probe(struct platform_device *pdev)
 	if (hwmon_irq < 0)
 		return hwmon_irq;
 
-	hwmon_irq = regmap_irq_get_virq(hwmon->da9055->irq_data, hwmon_irq);
-	if (hwmon_irq < 0)
-		return hwmon_irq;
-
 	ret = devm_request_threaded_irq(&pdev->dev, hwmon_irq,
 					NULL, da9055_auxadc_irq,
 					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,

From 8e86f0b409a44193f1587e87b69c5dcf8f65be67 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Feb 2014 12:29:12 +0000
Subject: [PATCH 156/171] arm64: atomics: fix use of acquire + release for full
 barrier semantics

Linux requires a number of atomic operations to provide full barrier
semantics, that is no memory accesses after the operation can be
observed before any accesses up to and including the operation in
program order.

On arm64, these operations have been incorrectly implemented as follows:

	// A, B, C are independent memory locations

	<Access [A]>

	// atomic_op (B)
1:	ldaxr	x0, [B]		// Exclusive load with acquire
	<op(B)>
	stlxr	w1, x0, [B]	// Exclusive store with release
	cbnz	w1, 1b

	<Access [C]>

The assumption here being that two half barriers are equivalent to a
full barrier, so the only permitted ordering would be A -> B -> C
(where B is the atomic operation involving both a load and a store).

Unfortunately, this is not the case by the letter of the architecture
and, in fact, the accesses to A and C are permitted to pass their
nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs
or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the
store-release on B). This is a clear violation of the full barrier
requirement.

The simple way to fix this is to implement the same algorithm as ARMv7
using explicit barriers:

	<Access [A]>

	// atomic_op (B)
	dmb	ish		// Full barrier
1:	ldxr	x0, [B]		// Exclusive load
	<op(B)>
	stxr	w1, x0, [B]	// Exclusive store
	cbnz	w1, 1b
	dmb	ish		// Full barrier

	<Access [C]>

but this has the undesirable effect of introducing *two* full barrier
instructions. A better approach is actually the following, non-intuitive
sequence:

	<Access [A]>

	// atomic_op (B)
1:	ldxr	x0, [B]		// Exclusive load
	<op(B)>
	stlxr	w1, x0, [B]	// Exclusive store with release
	cbnz	w1, 1b
	dmb	ish		// Full barrier

	<Access [C]>

The simple observations here are:

  - The dmb ensures that no subsequent accesses (e.g. the access to C)
    can enter or pass the atomic sequence.

  - The dmb also ensures that no prior accesses (e.g. the access to A)
    can pass the atomic sequence.

  - Therefore, no prior access can pass a subsequent access, or
    vice-versa (i.e. A is strictly ordered before C).

  - The stlxr ensures that no prior access can pass the store component
    of the atomic operation.

The only tricky part remaining is the ordering between the ldxr and the
access to A, since the absence of the first dmb means that we're now
permitting re-ordering between the ldxr and any prior accesses.

From an (arbitrary) observer's point of view, there are two scenarios:

  1. We have observed the ldxr. This means that if we perform a store to
     [B], the ldxr will still return older data. If we can observe the
     ldxr, then we can potentially observe the permitted re-ordering
     with the access to A, which is clearly an issue when compared to
     the dmb variant of the code. Thankfully, the exclusive monitor will
     save us here since it will be cleared as a result of the store and
     the ldxr will retry. Notice that any use of a later memory
     observation to imply observation of the ldxr will also imply
     observation of the access to A, since the stlxr/dmb ensure strict
     ordering.

  2. We have not observed the ldxr. This means we can perform a store
     and influence the later ldxr. However, that doesn't actually tell
     us anything about the access to [A], so we've not lost anything
     here either when compared to the dmb variant.

This patch implements this solution for our barriered atomic operations,
ensuring that we satisfy the full barrier requirements where they are
needed.

Cc: <stable@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic.h  | 29 ++++++++++++++++++++---------
 arch/arm64/include/asm/cmpxchg.h |  9 +++++----
 arch/arm64/include/asm/futex.h   |  6 ++++--
 arch/arm64/kernel/kuser32.S      |  6 ++++--
 arch/arm64/lib/bitops.S          |  3 ++-
 5 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 01de5aaa3edc..e32893e005d4 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -64,7 +64,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_add_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	add	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
@@ -72,6 +72,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	: "Ir" (i)
 	: "cc", "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -96,7 +97,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	int result;
 
 	asm volatile("// atomic_sub_return\n"
-"1:	ldaxr	%w0, %2\n"
+"1:	ldxr	%w0, %2\n"
 "	sub	%w0, %w0, %w3\n"
 "	stlxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
@@ -104,6 +105,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	: "Ir" (i)
 	: "cc", "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -112,17 +114,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 	unsigned long tmp;
 	int oldval;
 
+	smp_mb();
+
 	asm volatile("// atomic_cmpxchg\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	cmp	%w1, %w3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %w4, %2\n"
+"	stxr	%w0, %w4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
 	: "cc", "memory");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -183,7 +188,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_add_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	add	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
@@ -191,6 +196,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	: "Ir" (i)
 	: "cc", "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -215,7 +221,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_sub_return\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	sub	%0, %0, %3\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
@@ -223,6 +229,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	: "Ir" (i)
 	: "cc", "memory");
 
+	smp_mb();
 	return result;
 }
 
@@ -231,17 +238,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 	long oldval;
 	unsigned long res;
 
+	smp_mb();
+
 	asm volatile("// atomic64_cmpxchg\n"
-"1:	ldaxr	%1, %2\n"
+"1:	ldxr	%1, %2\n"
 "	cmp	%1, %3\n"
 "	b.ne	2f\n"
-"	stlxr	%w0, %4, %2\n"
+"	stxr	%w0, %4, %2\n"
 "	cbnz	%w0, 1b\n"
 "2:"
 	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
 	: "cc", "memory");
 
+	smp_mb();
 	return oldval;
 }
 
@@ -253,11 +263,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	unsigned long tmp;
 
 	asm volatile("// atomic64_dec_if_positive\n"
-"1:	ldaxr	%0, %2\n"
+"1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
 "	b.mi	2f\n"
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b\n"
+"	dmb	ish\n"
 "2:"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 56166d7f4a25..189390ce8653 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	switch (size) {
 	case 1:
 		asm volatile("//	__xchg1\n"
-		"1:	ldaxrb	%w0, %2\n"
+		"1:	ldxrb	%w0, %2\n"
 		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
@@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
-		"1:	ldaxrh	%w0, %2\n"
+		"1:	ldxrh	%w0, %2\n"
 		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
@@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
-		"1:	ldaxr	%w0, %2\n"
+		"1:	ldxr	%w0, %2\n"
 		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
@@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
-		"1:	ldaxr	%0, %2\n"
+		"1:	ldxr	%0, %2\n"
 		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
@@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		BUILD_BUG();
 	}
 
+	smp_mb();
 	return ret;
 }
 
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 78cc3aba5d69..572193d0005d 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -24,10 +24,11 @@
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
 	asm volatile(							\
-"1:	ldaxr	%w1, %2\n"						\
+"1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 "2:	stlxr	%w3, %w0, %2\n"						\
 "	cbnz	%w3, 1b\n"						\
+"	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
 "	.align	2\n"							\
@@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-"1:	ldaxr	%w1, %2\n"
+"1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
 "	cbnz	%w3, 3f\n"
 "2:	stlxr	%w3, %w5, %2\n"
 "	cbnz	%w3, 1b\n"
+"	dmb	ish\n"
 "3:\n"
 "	.pushsection .fixup,\"ax\"\n"
 "4:	mov	%w0, %w6\n"
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 63c48ffdf230..7787208e8cc6 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -38,12 +38,13 @@ __kuser_cmpxchg64:			// 0xffff0f60
 	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
 	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
 	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xe1b20e9f		// 1:	ldaexd		r0, r1, [r2]
+	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
 	.inst	0xe0303004		//	eors		r3, r0, r4
 	.inst	0x00313005		//	eoreqs		r3, r1, r5
 	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffff9		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
 	.inst	0xe12fff1e		//	bx		lr
@@ -55,11 +56,12 @@ __kuser_memory_barrier:			// 0xffff0fa0
 
 	.align	5
 __kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xe1923e9f		// 1:	ldaex		r3, [r2]
+	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
 	.inst	0xe0533000		//	subs		r3, r3, r0
 	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
 	.inst	0x03330001		//	teqeq		r3, #1
 	.inst	0x0afffffa		//	beq		1b
+	.inst	0xf57ff05b		//	dmb		ish
 	.inst	0xe2730000		//	rsbs		r0, r3, #0
 	.inst	0xe12fff1e		//	bx		lr
 
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
index e5db797790d3..7dac371cc9a2 100644
--- a/arch/arm64/lib/bitops.S
+++ b/arch/arm64/lib/bitops.S
@@ -46,11 +46,12 @@ ENTRY(	\name	)
 	mov	x2, #1
 	add	x1, x1, x0, lsr #3	// Get word offset
 	lsl	x4, x2, x3		// Create mask
-1:	ldaxr	x2, [x1]
+1:	ldxr	x2, [x1]
 	lsr	x0, x2, x3		// Save old value of bit
 	\instr	x2, x2, x4		// toggle bit
 	stlxr	w5, x2, [x1]
 	cbnz	w5, 1b
+	dmb	ish
 	and	x0, x0, #1
 3:	ret
 ENDPROC(\name	)

From 95c4189689f92fba7ecf9097173404d4928c6e9b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Feb 2014 12:29:13 +0000
Subject: [PATCH 157/171] arm64: asm: remove redundant "cc" clobbers

cbnz/tbnz don't update the condition flags, so remove the "cc" clobbers
from inline asm blocks that only use these instructions to implement
conditional branches.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic.h   | 24 ++++++++++--------------
 arch/arm64/include/asm/cmpxchg.h  |  8 ++++----
 arch/arm64/include/asm/futex.h    |  4 ++--
 arch/arm64/include/asm/spinlock.h | 10 +++++-----
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index e32893e005d4..0237f0867e37 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_add_return(int i, atomic_t *v)
@@ -70,7 +69,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
 	smp_mb();
 	return result;
@@ -87,8 +86,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 "	stxr	%w1, %w0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline int atomic_sub_return(int i, atomic_t *v)
@@ -103,7 +101,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
 	smp_mb();
 	return result;
@@ -125,7 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 "2:"
 	: "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
 	smp_mb();
 	return oldval;
@@ -178,8 +176,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_add_return(long i, atomic64_t *v)
@@ -194,7 +191,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
 	smp_mb();
 	return result;
@@ -211,8 +208,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 "	stxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
-	: "Ir" (i)
-	: "cc");
+	: "Ir" (i));
 }
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
@@ -227,7 +223,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 "	cbnz	%w1, 1b"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	: "Ir" (i)
-	: "cc", "memory");
+	: "memory");
 
 	smp_mb();
 	return result;
@@ -249,7 +245,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
 "2:"
 	: "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
 	: "Ir" (old), "r" (new)
-	: "cc", "memory");
+	: "cc");
 
 	smp_mb();
 	return oldval;
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 189390ce8653..57c0fa7bf711 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
@@ -43,7 +43,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
@@ -52,7 +52,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
@@ -61,7 +61,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		"	cbnz	%w1, 1b\n"
 			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
 			: "r" (x)
-			: "cc", "memory");
+			: "memory");
 		break;
 	default:
 		BUILD_BUG();
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 572193d0005d..5f750dc96e0f 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -41,7 +41,7 @@
 "	.popsection\n"							\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
 	: "r" (oparg), "Ir" (-EFAULT)					\
-	: "cc", "memory")
+	: "memory")
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
@@ -129,7 +129,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "	.popsection\n"
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
-	: "cc", "memory");
+	: "memory");
 
 	*uval = val;
 	return ret;
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 3d5cf064d7a1..c45b7b1b7197 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -132,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	"	cbnz	%w0, 2b\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -146,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp;
 }
@@ -187,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 2b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
@@ -201,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 	"	cbnz	%w1, 1b\n"
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -216,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	"1:\n"
 	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
 	:
-	: "cc", "memory");
+	: "memory");
 
 	return !tmp2;
 }

From 55834a773fe343912b705bef8114ec93fd337188 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Feb 2014 17:12:45 +0000
Subject: [PATCH 158/171] arm64: defconfig: Expand default enabled features

FPGA implementations of the Cortex-A57 and Cortex-A53 are now available
in the form of the SMM-A57 and SMM-A53 Soft Macrocell Models (SMMs) for
Versatile Express. As these attach to a Motherboard Express V2M-P1 it
would be useful to have support for some V2M-P1 peripherals enabled by
default.

Additionally a couple of of features have been introduced since the last
defconfig update (CMA, jump labels) that would be good to have enabled
by default to ensure they are build and boot tested.

This patch updates the arm64 defconfig to enable support for these
devices and features. The arm64 Kconfig is modified to select
HAVE_PATA_PLATFORM, which is required to enable support for the
CompactFlash controller on the V2M-P1.

A few options which don't need to appear in defconfig are trimmed:

* BLK_DEV - selected by default
* EXPERIMENTAL - otherwise gone from the kernel
* MII - selected by drivers which require it
* USB_SUPPORT - selected by default

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig           |  1 +
 arch/arm64/configs/defconfig | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dd4327f09ba4..27bbcfc7202a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -36,6 +36,7 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_MEMBLOCK
+	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 84139be62ae6..7959dd0ca5d5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
@@ -19,6 +18,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
@@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
 CONFIG_SMP=y
 CONFIG_PREEMPT=y
+CONFIG_CMA=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -42,14 +43,17 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
-CONFIG_BLK_DEV=y
+CONFIG_DMA_CMA=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
-CONFIG_MII=y
 CONFIG_SMC91X=y
+CONFIG_SMSC911X=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
@@ -62,13 +66,19 @@ CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_FB=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB=y
+CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_ARMMMCI=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y

From 0bbfdfe8d25fcc1d5c2edb6b060fb0c5cf66aff9 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 31 Jan 2014 19:33:39 +0200
Subject: [PATCH 159/171] libceph: factor out logic from
 ceph_osdc_start_request()

Factor out logic from ceph_osdc_start_request() into a new helper,
__ceph_osdc_start_request().  ceph_osdc_start_request() now amounts to
taking locks and calling __ceph_osdc_start_request().

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/osd_client.c | 62 +++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 166d4c7ba065..2aa82b6bb305 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1426,6 +1426,40 @@ static void __send_queued(struct ceph_osd_client *osdc)
 		__send_request(osdc, req);
 }
 
+/*
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __ceph_osdc_start_request(struct ceph_osd_client *osdc,
+				     struct ceph_osd_request *req,
+				     bool nofail)
+{
+	int rc;
+
+	__register_request(osdc, req);
+	req->r_sent = 0;
+	req->r_got_reply = 0;
+	rc = __map_request(osdc, req, 0);
+	if (rc < 0) {
+		if (nofail) {
+			dout("osdc_start_request failed map, "
+				" will retry %lld\n", req->r_tid);
+			rc = 0;
+		} else {
+			__unregister_request(osdc, req);
+		}
+		return rc;
+	}
+
+	if (req->r_osd == NULL) {
+		dout("send_request %p no up osds in pg\n", req);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	} else {
+		__send_queued(osdc);
+	}
+
+	return 0;
+}
+
 /*
  * Timeout callback, called every N seconds when 1 or more osd
  * requests has been active for more than N seconds.  When this
@@ -2351,34 +2385,16 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 			    struct ceph_osd_request *req,
 			    bool nofail)
 {
-	int rc = 0;
+	int rc;
 
 	down_read(&osdc->map_sem);
 	mutex_lock(&osdc->request_mutex);
-	__register_request(osdc, req);
-	req->r_sent = 0;
-	req->r_got_reply = 0;
-	rc = __map_request(osdc, req, 0);
-	if (rc < 0) {
-		if (nofail) {
-			dout("osdc_start_request failed map, "
-				" will retry %lld\n", req->r_tid);
-			rc = 0;
-		} else {
-			__unregister_request(osdc, req);
-		}
-		goto out_unlock;
-	}
-	if (req->r_osd == NULL) {
-		dout("send_request %p no up osds in pg\n", req);
-		ceph_monc_request_next_osdmap(&osdc->client->monc);
-	} else {
-		__send_queued(osdc);
-	}
-	rc = 0;
-out_unlock:
+
+	rc = __ceph_osdc_start_request(osdc, req, nofail);
+
 	mutex_unlock(&osdc->request_mutex);
 	up_read(&osdc->map_sem);
+
 	return rc;
 }
 EXPORT_SYMBOL(ceph_osdc_start_request);

From ff513ace9b772e75e337f8e058cc7f12816843fe Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Mon, 3 Feb 2014 13:56:33 +0200
Subject: [PATCH 160/171] libceph: take map_sem for read in handle_reply()

Handling redirect replies requires both map_sem and request_mutex.
Taking map_sem unconditionally near the top of handle_reply() avoids
possible race conditions that arise from releasing request_mutex to be
able to acquire map_sem in redirect reply case.  (Lock ordering is:
map_sem, request_mutex, crush_mutex.)

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/osd_client.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2aa82b6bb305..0676f2b199d6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1687,6 +1687,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	osdmap_epoch = ceph_decode_32(&p);
 
 	/* lookup */
+	down_read(&osdc->map_sem);
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
 	if (req == NULL) {
@@ -1743,7 +1744,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		dout("redirect pool %lld\n", redir.oloc.pool);
 
 		__unregister_request(osdc, req);
-		mutex_unlock(&osdc->request_mutex);
 
 		req->r_target_oloc = redir.oloc; /* struct */
 
@@ -1755,10 +1755,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		 * successfully.  In the future we might want to follow
 		 * original request's nofail setting here.
 		 */
-		err = ceph_osdc_start_request(osdc, req, true);
+		err = __ceph_osdc_start_request(osdc, req, true);
 		BUG_ON(err);
 
-		goto done;
+		goto out_unlock;
 	}
 
 	already_completed = req->r_got_reply;
@@ -1776,8 +1776,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		req->r_got_reply = 1;
 	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
 		dout("handle_reply tid %llu dup ack\n", tid);
-		mutex_unlock(&osdc->request_mutex);
-		goto done;
+		goto out_unlock;
 	}
 
 	dout("handle_reply tid %llu flags %d\n", tid, flags);
@@ -1792,6 +1791,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		__unregister_request(osdc, req);
 
 	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
 
 	if (!already_completed) {
 		if (req->r_unsafe_callback &&
@@ -1809,10 +1809,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		complete_request(req);
 	}
 
-done:
+out:
 	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
 	ceph_osdc_put_request(req);
 	return;
+out_unlock:
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+	goto out;
 
 bad_put:
 	req->r_result = -EIO;
@@ -1825,6 +1829,7 @@ bad_put:
 	ceph_osdc_put_request(req);
 bad_mutex:
 	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
 bad:
 	pr_err("corrupt osd_op_reply got %d %d\n",
 	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));

From 0ec1d15ec6ed513ab2cc86c67d94761d71228a32 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Wed, 5 Feb 2014 15:19:55 +0200
Subject: [PATCH 161/171] libceph: do not dereference a NULL bio pointer

Commit f38a5181d9f3 ("ceph: Convert to immutable biovecs") introduced
a NULL pointer dereference, which broke rbd in -rc1.  Fix it.

Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/messenger.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e478a0f4204..30efc5c18622 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -840,9 +840,13 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
 
 	if (!cursor->bvec_iter.bi_size) {
 		bio = bio->bi_next;
-		cursor->bvec_iter = bio->bi_iter;
+		cursor->bio = bio;
+		if (bio)
+			cursor->bvec_iter = bio->bi_iter;
+		else
+			memset(&cursor->bvec_iter, 0,
+			       sizeof(cursor->bvec_iter));
 	}
-	cursor->bio = bio;
 
 	if (!cursor->last_piece) {
 		BUG_ON(!cursor->resid);

From 1ccfe6f982674e4b7bd832b904bafcb3db890252 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Fri, 31 Jan 2014 13:47:34 +0100
Subject: [PATCH 162/171] watchdog: dw_wdt: Add dependency on HAS_IOMEM

On archs like S390 or um this driver cannot build nor work.
Make it depend on HAS_IOMEM to bypass build failures.

drivers/built-in.o: In function `dw_wdt_drv_probe':
drivers/watchdog/dw_wdt.c:302: undefined reference to `devm_ioremap_resource'

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 4c4c566c52a3..79d25894343a 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -223,6 +223,7 @@ config SA1100_WATCHDOG
 
 config DW_WATCHDOG
 	tristate "Synopsys DesignWare watchdog"
+	depends on HAS_IOMEM
 	help
 	  Say Y here if to include support for the Synopsys DesignWare
 	  watchdog timer found in many chips.

From c18f7b51200c3c8b76c63e391f9995b65ace9c83 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Fri, 7 Feb 2014 14:36:10 -0600
Subject: [PATCH 163/171] jfs: fix generic posix ACL regression

I missed a couple errors in reviewing the patches converting jfs
to use the generic posix ACL function. Setting ACL's currently
fails with -EOPNOTSUPP.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reported-by: Michael L. Semon <mlsemon35@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/jfs/xattr.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 3bd5ee45f7b3..46325d5c34fc 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -854,9 +854,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	int rc;
 	tid_t tid;
 
-	if ((rc = can_set_xattr(inode, name, value, value_len)))
-		return rc;
-
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
@@ -865,6 +862,9 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_setxattr(dentry, name, value, value_len, flags);
 
+	if ((rc = can_set_xattr(inode, name, value, value_len)))
+		return rc;
+
 	if (value == NULL) {	/* empty EA, do not remove */
 		value = "";
 		value_len = 0;
@@ -1034,9 +1034,6 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 	int rc;
 	tid_t tid;
 
-	if ((rc = can_set_xattr(inode, name, NULL, 0)))
-		return rc;
-
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
@@ -1045,6 +1042,9 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_removexattr(dentry, name);
 
+	if ((rc = can_set_xattr(inode, name, NULL, 0)))
+		return rc;
+
 	tid = txBegin(inode->i_sb, 0);
 	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
@@ -1061,7 +1061,7 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
  * attributes are handled directly.
  */
 const struct xattr_handler *jfs_xattr_handlers[] = {
-#ifdef JFS_POSIX_ACL
+#ifdef CONFIG_JFS_POSIX_ACL
 	&posix_acl_access_xattr_handler,
 	&posix_acl_default_xattr_handler,
 #endif

From 6cc98d90f8d14f8ebce2391323929024d7eef39f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 5 Feb 2014 16:19:21 -0500
Subject: [PATCH 164/171] Btrfs: fix assert screwup for the pending move stuff

Wang noticed that he was failing btrfs/030 even though me and Filipe couldn't
reproduce.  Turns out this is because Wang didn't have CONFIG_BTRFS_ASSERT set,
which meant that a key part of Filipe's original patch was not being built in.
This appears to be a mess up with merging Filipe's patch as it does not exist in
his original patch.  Fix this by changing how we make sure del_waiting_dir_move
asserts that it did not error and take the function out of the ifdef check.
This makes btrfs/030 pass with the assert on or off.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/send.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index cf9107a64204..9c8d1a3fdc3a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2774,8 +2774,6 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 	return 0;
 }
 
-#ifdef CONFIG_BTRFS_ASSERT
-
 static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 {
 	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
@@ -2796,8 +2794,6 @@ static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 	return -ENOENT;
 }
 
-#endif
-
 static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
 {
 	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
@@ -2902,7 +2898,9 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 	}
 
 	sctx->send_progress = sctx->cur_ino + 1;
-	ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0);
+	ret = del_waiting_dir_move(sctx, pm->ino);
+	ASSERT(ret == 0);
+
 	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
 	if (ret < 0)
 		goto out;

From d0270aca88966641eb15306e9bd0c7ad15321440 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 7 Feb 2014 14:33:57 +0100
Subject: [PATCH 165/171] btrfs: commit transaction after setting label and
 features

The set_fslabel ioctl uses btrfs_end_transaction, which means it's
possible that the change will be lost if the system crashes, same for
the newly set features. Let's use btrfs_commit_transaction instead.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 34772cbcc7aa..5bbf6b7216c3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4547,7 +4547,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
 	spin_lock(&root->fs_info->super_lock);
 	strcpy(super_block->label, label);
 	spin_unlock(&root->fs_info->super_lock);
-	ret = btrfs_end_transaction(trans, root);
+	ret = btrfs_commit_transaction(trans, root);
 
 out_unlock:
 	mnt_drop_write_file(file);
@@ -4711,7 +4711,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 	btrfs_set_super_incompat_flags(super_block, newflags);
 	spin_unlock(&root->fs_info->super_lock);
 
-	return btrfs_end_transaction(trans, root);
+	return btrfs_commit_transaction(trans, root);
 }
 
 long btrfs_ioctl(struct file *file, unsigned int

From 8051aa1a3d5aaa7bd4c062cad94d09c3d567ef2e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Fri, 7 Feb 2014 14:34:04 +0100
Subject: [PATCH 166/171] btrfs: reserve no transaction units in
 btrfs_ioctl_set_features

Added in patch "btrfs: add ioctls to query/change feature bits online"
modifications to superblock don't need to reserve metadata blocks when
starting a transaction.

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5bbf6b7216c3..ebdd866d4cfd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4690,7 +4690,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 	if (ret)
 		return ret;
 
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans))
 		return PTR_ERR(trans);
 

From 27a377db745ed4d11b3b9b340756857cb8dde07f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 7 Feb 2014 13:57:59 -0500
Subject: [PATCH 167/171] Btrfs: don't loop forever if we can't run because of
 the tree mod log

A user reported a 100% cpu hang with my new delayed ref code.  Turns out I
forgot to increase the count check when we can't run a delayed ref because of
the tree mod log.  If we can't run any delayed refs during this there is no
point in continuing to look, and we need to break out.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/extent-tree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c9ecc93ae2c..32312e09f0f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2385,6 +2385,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			spin_unlock(&delayed_refs->lock);
 			locked_ref = NULL;
 			cond_resched();
+			count++;
 			continue;
 		}
 

From a2aa75e18a21b21952dc6daa9bac7c9f4426f81f Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Sat, 8 Feb 2014 15:47:46 +0000
Subject: [PATCH 168/171] Btrfs: fix data corruption when reading/updating
 compressed extents

When using a mix of compressed file extents and prealloc extents, it
is possible to fill a page of a file with random, garbage data from
some unrelated previous use of the page, instead of a sequence of zeroes.

A simple sequence of steps to get into such case, taken from the test
case I made for xfstests, is:

   _scratch_mkfs
   _scratch_mount "-o compress-force=lzo"
   $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar

This results in the following file items in the fs tree:

   item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160
       inode generation 6 transid 6 size 542872 block group 0 mode 100600
   item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16
       inode ref index 2 namelen 6 name: foobar
   item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53
       extent data disk byte 0 nr 0 gen 6
       extent data offset 0 nr 24576 ram 266240
       extent compression 0
   item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53
       prealloc data disk byte 12849152 nr 241664 gen 6
       prealloc data offset 0 nr 241664
   item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53
       extent data disk byte 12845056 nr 4096 gen 6
       extent data offset 0 nr 20480 ram 20480
       extent compression 2
   item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53
       prealloc data disk byte 13090816 nr 405504 gen 6
       prealloc data offset 0 nr 258048

The on disk extent at offset 266240 (which corresponds to 1 single disk block),
contains 5 compressed chunks of file data. Each of the first 4 compress 4096
bytes of file data, while the last one only compresses 3024 bytes of file data.
Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 =
1072 bytes) should always return zeroes (our next extent is a prealloc one).

The solution here is the compression code path to zero the remaining (untouched)
bytes of the last page it uncompressed data into, as the information about how
much space the file data consumes in the last page is not known in the upper layer
fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing
the remainder of the page but only if it corresponds to the last page of the inode
and if the inode's size is not a multiple of the page size.

This would cause not only returning random data on reads, but also permanently
storing random data when updating parts of the region that should be zeroed.
For the example above, it means updating a single byte in the region [285648 ; 286720[
would store that byte correctly but also store random data on disk.

A test case for xfstests follows soon.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/compression.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index af815eb8f970..ed1ff1cb1017 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1011,6 +1011,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		bytes = min(bytes, working_bytes);
 		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+		if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+			memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(page_out);
 

From d311d79de305f1ada47cadd672e6ed1b28a949eb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Feb 2014 15:18:09 -0500
Subject: [PATCH 169/171] fix O_SYNC|O_APPEND syncing the wrong range on
 write()

It actually goes back to 2004 ([PATCH] Concurrent O_SYNC write support)
when sync_page_range() had been introduced; generic_file_write{,v}() correctly
synced
	pos_after_write - written .. pos_after_write - 1
but generic_file_aio_write() synced
	pos_before_write .. pos_before_write + written - 1
instead.  Which is not the same thing with O_APPEND, obviously.
A couple of years later correct variant had been killed off when
everything switched to use of generic_file_aio_write().

All users of generic_file_aio_write() are affected, and the same bug
has been copied into other instances of ->aio_write().

The fix is trivial; the only subtle point is that generic_write_sync()
ought to be inlined to avoid calculations useless for the majority of
calls.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/file.c     |  4 ++--
 fs/ext4/file.c     |  2 +-
 fs/ntfs/file.c     |  2 +-
 fs/sync.c          | 17 -----------------
 fs/xfs/xfs_file.c  |  2 +-
 include/linux/fs.h |  8 +++++++-
 mm/filemap.c       |  4 ++--
 7 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 853d6d1cc822..a7eda8ebfacc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2559,8 +2559,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
 	if (rc > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, rc);
-		if (err < 0 && rc > 0)
+		err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+		if (err < 0)
 			rc = err;
 	}
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 43e64f6022eb..1a5073959f32 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -152,7 +152,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ea4ba9daeb47..db9bd8a31725 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2134,7 +2134,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 	if (ret > 0) {
-		int err = generic_write_sync(file, pos, ret);
+		int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
diff --git a/fs/sync.c b/fs/sync.c
index f15537452231..e8ba024a055b 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -222,23 +222,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
 	return do_fsync(fd, 1);
 }
 
-/**
- * generic_write_sync - perform syncing after a write if file / inode is sync
- * @file:	file to which the write happened
- * @pos:	offset where the write started
- * @count:	length of the write
- *
- * This is just a simple wrapper about our general syncing function.
- */
-int generic_write_sync(struct file *file, loff_t pos, loff_t count)
-{
-	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
-		return 0;
-	return vfs_fsync_range(file, pos, pos + count - 1,
-			       (file->f_flags & __O_SYNC) ? 0 : 1);
-}
-EXPORT_SYMBOL(generic_write_sync);
-
 /*
  * sys_sync_file_range() permits finely controlled syncing over a segment of
  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2e7989e3a2d6..64b48eade91d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -799,7 +799,7 @@ xfs_file_aio_write(
 		XFS_STATS_ADD(xs_write_bytes, ret);
 
 		/* Handle various SYNC-type writes */
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c59813..75ff961be051 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2273,7 +2273,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
-extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
+static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
+{
+	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
+		return 0;
+	return vfs_fsync_range(file, pos, pos + count - 1,
+			       (file->f_flags & __O_SYNC) ? 0 : 1);
+}
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 #ifdef CONFIG_BLOCK
diff --git a/mm/filemap.c b/mm/filemap.c
index d56d3c145b9f..7a13f6ac5421 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, ret);
-		if (err < 0 && ret > 0)
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+		if (err < 0)
 			ret = err;
 	}
 	return ret;

From c9efe51165fa0aff57be54e3cb0201ac87f68980 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 2 Feb 2014 07:05:05 -0500
Subject: [PATCH 170/171] fix a kmap leak in virtio_console

While we are at it, don't do kmap() under kmap_atomic(), *especially*
for a page we'd allocated with GFP_KERNEL.  It's spelled "page_address",
and had that been more than that, we'd have a real trouble - kmap_high()
can block, and doing that while holding kmap_atomic() is a Bad Idea(tm).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/virtio_console.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index feea87cc6b8f..6928d094451d 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -890,12 +890,10 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	} else {
 		/* Failback to copying a page */
 		struct page *page = alloc_page(GFP_KERNEL);
-		char *src = buf->ops->map(pipe, buf, 1);
-		char *dst;
+		char *src;
 
 		if (!page)
 			return -ENOMEM;
-		dst = kmap(page);
 
 		offset = sd->pos & ~PAGE_MASK;
 
@@ -903,9 +901,8 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		if (len + offset > PAGE_SIZE)
 			len = PAGE_SIZE - offset;
 
-		memcpy(dst + offset, src + buf->offset, len);
-
-		kunmap(page);
+		src = buf->ops->map(pipe, buf, 1);
+		memcpy(page_address(page) + offset, src + buf->offset, len);
 		buf->ops->unmap(pipe, buf, src);
 
 		sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);

From b28a960c42fcd9cfc987441fa6d1c1a471f0f9ed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 Feb 2014 18:15:47 -0800
Subject: [PATCH 171/171] Linux 3.14-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 606ef7c4a544..933e1def6baf 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Shuffling Zombie Juror
 
 # *DOCUMENTATION*