From b80d5ccca3a012e91ca64a2a0b13049163a6a698 Mon Sep 17 00:00:00 2001 From: Haiyan Hu Date: Tue, 10 Sep 2013 11:25:37 +0800 Subject: [PATCH 001/171] NVMe: Avoid shift operation when writing cq head doorbell Changes the type of dev->db_stride to unsigned and changes the value stored there to be 1 << the current value. Then there is less calculation to be done at completion time. Signed-off-by: Haiyan Hu Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 10 +++++----- include/linux/nvme.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 26d03fa0bf26..073aec913c78 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -775,7 +775,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return 0; - writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -1113,7 +1113,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, init_waitqueue_head(&nvmeq->sq_full); init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread); bio_list_init(&nvmeq->sq_cong); - nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; + nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->cq_vector = vector; nvmeq->q_suspended = 1; @@ -1149,7 +1149,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; - nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; + nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset(nvmeq->cmdid_data, 0, extra); memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_cancel_ios(nvmeq, false); @@ -1741,7 +1741,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) { - return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); + return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); } static int nvme_setup_io_queues(struct nvme_dev *dev) @@ -1958,7 +1958,7 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!dev->bar) goto disable; - dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap)); + dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap)); dev->dbs = ((void __iomem *)dev->bar) + 4096; return 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26ebcf41c213..8119a476cb29 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -80,7 +80,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; int instance; int queue_count; - int db_stride; + u32 db_stride; u32 ctrl_config; struct msix_entry *entry; struct nvme_bar __iomem *bar; From 481e5bad84239b01415b888df2040c1860ae3cfd Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Sat, 12 Oct 2013 06:23:29 +0200 Subject: [PATCH 002/171] NVMe: remove deprecated IRQF_DISABLED This patch proposes to remove the use of the IRQF_DISABLED flag It's a NOOP since 2.6.35 and it will be removed one day. Signed-off-by: Michael Opdenacker Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 073aec913c78..df3daeb6227f 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1134,11 +1134,10 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, { if (use_threaded_interrupts) return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, - nvme_irq_check, nvme_irq, - IRQF_DISABLED | IRQF_SHARED, + nvme_irq_check, nvme_irq, IRQF_SHARED, name, nvmeq); return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, - IRQF_DISABLED | IRQF_SHARED, name, nvmeq); + IRQF_SHARED, name, nvmeq); } static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) From a6f089e95b1e08cdea9633d50ad20aa5d44ba64d Mon Sep 17 00:00:00 2001 From: Lior Amsalem Date: Mon, 25 Nov 2013 17:26:44 +0100 Subject: [PATCH 003/171] irqchip: armada-370-xp: fix IPI race condition In the Armada 370/XP driver, when we receive an IRQ 0, we read the list of doorbells that caused the interrupt from register ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS. This gives the list of IPIs that were generated. However, instead of acknowledging only the IPIs that were generated, we acknowledge *all* the IPIs, by writing ~IPI_DOORBELL_MASK in the ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register. This creates a race condition: if a new IPI that isn't part of the ones read into the temporary "ipimask" variable is fired before we acknowledge all IPIs, then we will simply loose it. This is causing scheduling hangs on SMP intensive workloads. It is important to mention that this ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register has the following behavior: "A CPU write of 0 clears the bits in this field. A CPU write of 1 has no effect". This is what allows us to simply write ~ipimask to acknoledge the handled IPIs. Notice that the same problem is present in the MSI implementation, but it will be fixed as a separate patch, so that this IPI fix can be pushed to older stable versions as appropriate (all the way to 3.8), while the MSI code only appeared in 3.13. Signed-off-by: Lior Amsalem Signed-off-by: Thomas Petazzoni Cc: stable@vger.kernel.org # v3.8+ Fixes: 344e873e5657e8dc0 'arm: mvebu: Add IPI support via doorbells' Cc: Thomas Gleixner Signed-off-by: Jason Cooper --- drivers/irqchip/irq-armada-370-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 433cc8568dec..f5e49a2d8e5a 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -407,7 +407,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs) ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS) & IPI_DOORBELL_MASK; - writel(~IPI_DOORBELL_MASK, per_cpu_int_base + + writel(~ipimask, per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS); /* Handle all pending doorbells */ From c7f7bd4a136e4b02dd2a66bf95aec545bd93e8db Mon Sep 17 00:00:00 2001 From: Lior Amsalem Date: Mon, 25 Nov 2013 17:26:45 +0100 Subject: [PATCH 004/171] irqchip: armada-370-xp: fix MSI race condition In the Armada 370/XP driver, when we receive an IRQ 1, we read the list of doorbells that caused the interrupt from register ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS. This gives the list of MSIs that were generated. However, instead of acknowledging only the MSIs that were generated, we acknowledge *all* the MSIs, by writing ~MSI_DOORBELL_MASK in the ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register. This creates a race condition: if a new MSI that isn't part of the ones read into the temporary "msimask" variable is fired before we acknowledge all MSIs, then we will simply loose it. It is important to mention that this ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS register has the following behavior: "A CPU write of 0 clears the bits in this field. A CPU write of 1 has no effect". This is what allows us to simply write ~msimask to acknoledge the handled MSIs. Notice that the same problem is present in the IPI implementation, but it is fixed as a separate patch, so that this IPI fix can be pushed to older stable versions as appropriate (all the way to 3.8), while the MSI code only appeared in 3.13. Signed-off-by: Lior Amsalem Signed-off-by: Thomas Petazzoni Cc: Thomas Gleixner Signed-off-by: Jason Cooper --- drivers/irqchip/irq-armada-370-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index f5e49a2d8e5a..3fac063b4a78 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -381,7 +381,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs) ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS) & PCI_MSI_DOORBELL_MASK; - writel(~PCI_MSI_DOORBELL_MASK, per_cpu_int_base + + writel(~msimask, per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS); for (msinr = PCI_MSI_DOORBELL_START; From 320a382746e0ab1304476ea7e986a8d416ab99db Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 23 Oct 2013 13:07:34 -0600 Subject: [PATCH 005/171] NVMe: compat SG_IO ioctl For 32-bit versions of sg3-utils running on a 64-bit system. This is mostly a copy from the relevent portions of fs/compat_ioctl.c, with slight modifications for going through block_device_operations. Signed-off-by: Keith Busch Reviewed-by: Vishal Verma [fixed up CONFIG_COMPAT=n build problems] Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 18 ++++- drivers/block/nvme-scsi.c | 147 ++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 1 + 3 files changed, 165 insertions(+), 1 deletion(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index df3daeb6227f..e44350de8a21 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1568,10 +1568,26 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, } } +#ifdef CONFIG_COMPAT +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + + switch (cmd) { + case SG_IO: + return nvme_sg_io32(ns, arg); + } + return nvme_ioctl(bdev, mode, cmd, arg); +} +#else +#define nvme_compat_ioctl NULL +#endif + static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, }; static void nvme_resubmit_bios(struct nvme_queue *nvmeq) diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 4a4ff4eb8e23..4a0ceb64e269 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -3038,6 +3039,152 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) return retcode; } +#ifdef CONFIG_COMPAT +typedef struct sg_io_hdr32 { + compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ + compat_int_t dxfer_direction; /* [i] data transfer direction */ + unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ + unsigned char mx_sb_len; /* [i] max length to write to sbp */ + unsigned short iovec_count; /* [i] 0 implies no scatter gather */ + compat_uint_t dxfer_len; /* [i] byte count of data transfer */ + compat_uint_t dxferp; /* [i], [*io] points to data transfer memory + or scatter gather list */ + compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ + compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ + compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ + compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ + compat_int_t pack_id; /* [i->o] unused internally (normally) */ + compat_uptr_t usr_ptr; /* [i->o] unused internally */ + unsigned char status; /* [o] scsi status */ + unsigned char masked_status; /* [o] shifted, masked scsi status */ + unsigned char msg_status; /* [o] messaging level data (optional) */ + unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ + unsigned short host_status; /* [o] errors from host adapter */ + unsigned short driver_status; /* [o] errors from software driver */ + compat_int_t resid; /* [o] dxfer_len - actual_transferred */ + compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ + compat_uint_t info; /* [o] auxiliary information */ +} sg_io_hdr32_t; /* 64 bytes long (on sparc32) */ + +typedef struct sg_iovec32 { + compat_uint_t iov_base; + compat_uint_t iov_len; +} sg_iovec32_t; + +static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iovec_count) +{ + sg_iovec_t __user *iov = (sg_iovec_t __user *) (sgio + 1); + sg_iovec32_t __user *iov32 = dxferp; + int i; + + for (i = 0; i < iovec_count; i++) { + u32 base, len; + + if (get_user(base, &iov32[i].iov_base) || + get_user(len, &iov32[i].iov_len) || + put_user(compat_ptr(base), &iov[i].iov_base) || + put_user(len, &iov[i].iov_len)) + return -EFAULT; + } + + if (put_user(iov, &sgio->dxferp)) + return -EFAULT; + return 0; +} + +int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg) +{ + sg_io_hdr32_t __user *sgio32 = (sg_io_hdr32_t __user *)arg; + sg_io_hdr_t __user *sgio; + u16 iovec_count; + u32 data; + void __user *dxferp; + int err; + int interface_id; + + if (get_user(interface_id, &sgio32->interface_id)) + return -EFAULT; + if (interface_id != 'S') + return -EINVAL; + + if (get_user(iovec_count, &sgio32->iovec_count)) + return -EFAULT; + + { + void __user *top = compat_alloc_user_space(0); + void __user *new = compat_alloc_user_space(sizeof(sg_io_hdr_t) + + (iovec_count * sizeof(sg_iovec_t))); + if (new > top) + return -EINVAL; + + sgio = new; + } + + /* Ok, now construct. */ + if (copy_in_user(&sgio->interface_id, &sgio32->interface_id, + (2 * sizeof(int)) + + (2 * sizeof(unsigned char)) + + (1 * sizeof(unsigned short)) + + (1 * sizeof(unsigned int)))) + return -EFAULT; + + if (get_user(data, &sgio32->dxferp)) + return -EFAULT; + dxferp = compat_ptr(data); + if (iovec_count) { + if (sg_build_iovec(sgio, dxferp, iovec_count)) + return -EFAULT; + } else { + if (put_user(dxferp, &sgio->dxferp)) + return -EFAULT; + } + + { + unsigned char __user *cmdp; + unsigned char __user *sbp; + + if (get_user(data, &sgio32->cmdp)) + return -EFAULT; + cmdp = compat_ptr(data); + + if (get_user(data, &sgio32->sbp)) + return -EFAULT; + sbp = compat_ptr(data); + + if (put_user(cmdp, &sgio->cmdp) || + put_user(sbp, &sgio->sbp)) + return -EFAULT; + } + + if (copy_in_user(&sgio->timeout, &sgio32->timeout, + 3 * sizeof(int))) + return -EFAULT; + + if (get_user(data, &sgio32->usr_ptr)) + return -EFAULT; + if (put_user(compat_ptr(data), &sgio->usr_ptr)) + return -EFAULT; + + err = nvme_sg_io(ns, sgio); + if (err >= 0) { + void __user *datap; + + if (copy_in_user(&sgio32->pack_id, &sgio->pack_id, + sizeof(int)) || + get_user(datap, &sgio->usr_ptr) || + put_user((u32)(unsigned long)datap, + &sgio32->usr_ptr) || + copy_in_user(&sgio32->status, &sgio->status, + (4 * sizeof(unsigned char)) + + (2 * sizeof(unsigned short)) + + (3 * sizeof(int)))) + err = -EFAULT; + } + + return err; +} +#endif + int nvme_sg_get_version_num(int __user *ip) { return put_user(sg_version_num, ip); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8119a476cb29..5bc29197d90e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -165,6 +165,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, struct sg_io_hdr; int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); int nvme_sg_get_version_num(int __user *ip); #endif /* _LINUX_NVME_H */ From 0a8d44cb33377969337fa6c1961b631605d5f453 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 15 Oct 2013 15:01:10 -0400 Subject: [PATCH 006/171] NVMe: Fix lockdep warnings During the initialisation path, the queue lock is taken without interrupt protection. It's perfectly safe to do so, because the interrupt handler can't run at this point, but it confuses lockdep. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e44350de8a21..0e9c5dcb2bd7 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1172,9 +1172,9 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) if (result < 0) goto release_sq; - spin_lock(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->q_lock); nvme_init_queue(nvmeq, qid); - spin_unlock(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->q_lock); return result; @@ -1290,9 +1290,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result) return result; - spin_lock(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->q_lock); nvme_init_queue(nvmeq, 0); - spin_unlock(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->q_lock); return result; } @@ -1836,9 +1836,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) for (i = dev->queue_count - 1; i > nr_io_queues; i--) { struct nvme_queue *nvmeq = dev->queues[i]; - spin_lock(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->q_lock); nvme_cancel_ios(nvmeq, false); - spin_unlock(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->q_lock); nvme_free_queue(nvmeq); dev->queue_count--; From 68608c268bf265b039daeaafee6c902dfef32024 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 21 Jun 2013 14:36:34 -0400 Subject: [PATCH 007/171] NVMe: Cache dev->pci_dev in a local pointer Helps with line-length issues Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 0e9c5dcb2bd7..300766973d76 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1891,6 +1891,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ static int nvme_dev_add(struct nvme_dev *dev) { + struct pci_dev *pdev = dev->pci_dev; int res; unsigned nn, i; struct nvme_ns *ns; @@ -1900,8 +1901,7 @@ static int nvme_dev_add(struct nvme_dev *dev) dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, - GFP_KERNEL); + mem = dma_alloc_coherent(&pdev->dev, 8192, &dma_addr, GFP_KERNEL); if (!mem) return -ENOMEM; @@ -1919,8 +1919,8 @@ static int nvme_dev_add(struct nvme_dev *dev) memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); if (ctrl->mdts) dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); - if ((dev->pci_dev->vendor == PCI_VENDOR_ID_INTEL) && - (dev->pci_dev->device == 0x0953) && ctrl->vs[3]) + if ((pdev->vendor == PCI_VENDOR_ID_INTEL) && + (pdev->device == 0x0953) && ctrl->vs[3]) dev->stripe_size = 1 << (ctrl->vs[3] + shift); id_ns = mem; From 9a6b94584de1a0467d85b435df9c744c5c45a270 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:36 -0700 Subject: [PATCH 008/171] NVMe: Device resume error handling Adds controller error handling on resume power management. If the device fails to initialize, the device is queued for a reset. If the reset fails, a thread is spawned to remove the pci device. If the device resumes as "busy", the device is responding to admin commands but will not create IO queues. In this case, we need to remove the gendisks and free the IO queues since they can't be used and may be holding bios in their lists. From testing, the dma pools require a pci device so this had to change the pci driver 'remove' to release the dma resources in line with that call instead of after all references to the device are released. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 108 ++++++++++++++++++++++++++++++++------ include/linux/nvme.h | 1 + 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 300766973d76..000bca43c23b 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -58,6 +58,7 @@ module_param(use_threaded_interrupts, int, 0); static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; +static struct workqueue_struct *nvme_workq; /* * An NVM Express queue. Each device has at least two (one for admin @@ -1968,7 +1969,6 @@ static int nvme_dev_map(struct nvme_dev *dev) dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) goto disable; - pci_set_drvdata(pdev, dev); dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); if (!dev->bar) goto disable; @@ -1995,9 +1995,9 @@ static void nvme_dev_unmap(struct nvme_dev *dev) if (dev->bar) { iounmap(dev->bar); dev->bar = NULL; + pci_release_regions(dev->pci_dev); } - pci_release_regions(dev->pci_dev); if (pci_is_enabled(dev->pci_dev)) pci_disable_device(dev->pci_dev); } @@ -2085,11 +2085,6 @@ static void nvme_release_instance(struct nvme_dev *dev) static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); - nvme_dev_remove(dev); - nvme_dev_shutdown(dev); - nvme_free_queues(dev); - nvme_release_instance(dev); - nvme_release_prp_pools(dev); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2161,6 +2156,70 @@ static int nvme_dev_start(struct nvme_dev *dev) return result; } +static int nvme_remove_dead_ctrl(void *arg) +{ + struct nvme_dev *dev = (struct nvme_dev *)arg; + struct pci_dev *pdev = dev->pci_dev; + + if (pci_get_drvdata(pdev)) + pci_stop_and_remove_bus_device(pdev); + kref_put(&dev->kref, nvme_free_dev); + return 0; +} + +static void nvme_remove_disks(struct work_struct *ws) +{ + int i; + struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); + + nvme_dev_remove(dev); + spin_lock(&dev_list_lock); + for (i = dev->queue_count - 1; i > 0; i--) { + BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended); + nvme_free_queue(dev->queues[i]); + dev->queue_count--; + dev->queues[i] = NULL; + } + spin_unlock(&dev_list_lock); +} + +static int nvme_dev_resume(struct nvme_dev *dev) +{ + int ret; + + ret = nvme_dev_start(dev); + if (ret && ret != -EBUSY) + return ret; + if (ret == -EBUSY) { + spin_lock(&dev_list_lock); + INIT_WORK(&dev->reset_work, nvme_remove_disks); + queue_work(nvme_workq, &dev->reset_work); + spin_unlock(&dev_list_lock); + } + return 0; +} + +static void nvme_dev_reset(struct nvme_dev *dev) +{ + nvme_dev_shutdown(dev); + if (nvme_dev_resume(dev)) { + dev_err(&dev->pci_dev->dev, "Device failed to resume\n"); + kref_get(&dev->kref); + if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", + dev->instance))) { + dev_err(&dev->pci_dev->dev, + "Failed to start controller remove task\n"); + kref_put(&dev->kref, nvme_free_dev); + } + } +} + +static void nvme_reset_failed_dev(struct work_struct *ws) +{ + struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); + nvme_dev_reset(dev); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int result = -ENOMEM; @@ -2180,7 +2239,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->pci_dev = pdev; - + pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); if (result) goto free; @@ -2232,7 +2291,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); + + spin_lock(&dev_list_lock); + list_del_init(&dev->node); + spin_unlock(&dev_list_lock); + + pci_set_drvdata(pdev, NULL); + flush_work(&dev->reset_work); misc_deregister(&dev->miscdev); + nvme_dev_remove(dev); + nvme_dev_shutdown(dev); + nvme_free_queues(dev); + nvme_release_instance(dev); + nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); } @@ -2256,13 +2327,12 @@ static int nvme_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - int ret; - ret = nvme_dev_start(ndev); - /* XXX: should remove gendisks if resume fails */ - if (ret) - nvme_free_queues(ndev); - return ret; + if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { + INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev); + queue_work(nvme_workq, &ndev->reset_work); + } + return 0; } static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); @@ -2303,9 +2373,14 @@ static int __init nvme_init(void) if (IS_ERR(nvme_thread)) return PTR_ERR(nvme_thread); + result = -ENOMEM; + nvme_workq = create_singlethread_workqueue("nvme"); + if (!nvme_workq) + goto kill_kthread; + result = register_blkdev(nvme_major, "nvme"); if (result < 0) - goto kill_kthread; + goto kill_workq; else if (result > 0) nvme_major = result; @@ -2316,6 +2391,8 @@ static int __init nvme_init(void) unregister_blkdev: unregister_blkdev(nvme_major, "nvme"); + kill_workq: + destroy_workqueue(nvme_workq); kill_kthread: kthread_stop(nvme_thread); return result; @@ -2325,6 +2402,7 @@ static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); unregister_blkdev(nvme_major, "nvme"); + destroy_workqueue(nvme_workq); kthread_stop(nvme_thread); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5bc29197d90e..eed81cc56d7e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,6 +87,7 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct miscdevice miscdev; + struct work_struct reset_work; char name[12]; char serial[20]; char model[40]; From b8989db9d82465bf38a48a4d3ef32e7d8afc4d08 Mon Sep 17 00:00:00 2001 From: Alan Date: Mon, 20 Jan 2014 18:01:56 +0000 Subject: [PATCH 009/171] x86, doc, kconfig: Fix dud URL for Microcode data The actual data lives in the Intel download center, and that ought to also be a reliable way to continue to find it. Unfortunately the actual URL needed for doing it directly is about a foot long so give instructions. Signed-off-by: Alan Cox Link: http://lkml.kernel.org/r/20140120180056.7173.62222.stgit@alan.etchedpixels.co.uk Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0952ecd60eca..64199bc08d66 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1064,9 +1064,9 @@ config MICROCODE_INTEL This options enables microcode patch loading support for Intel processors. - For latest news and information on obtaining all the required - Intel ingredients for this driver, check: - . + For the current Intel microcode data package go to + and search for + 'Linux Processor Microcode Data File'. config MICROCODE_AMD bool "AMD microcode loading support" From 3aba55605326be6d7e624090858aa921ab519cda Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 4 Dec 2013 21:05:46 +0900 Subject: [PATCH 010/171] irqchip: renesas-irqc: Use lazy disable Set the ->irq_enable() and ->irq_disable() methods to NULL to enable lazy disable of interrupts. This by itself provides some level of optimization, but is mainly enabled as ground work for future Suspend-to-RAM wake up support. Signed-off-by: Magnus Damm Cc: rob.herring@calxeda.com Cc: grant.likely@secretlab.ca Cc: horms@verge.net.au Link: http://lkml.kernel.org/r/20131204120546.29642.15772.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-renesas-irqc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 2f404ba61c6c..8fdd7d68cd00 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -212,8 +212,6 @@ static int irqc_probe(struct platform_device *pdev) irq_chip->name = name; irq_chip->irq_mask = irqc_irq_disable; irq_chip->irq_unmask = irqc_irq_enable; - irq_chip->irq_enable = irqc_irq_enable; - irq_chip->irq_disable = irqc_irq_disable; irq_chip->irq_set_type = irqc_irq_set_type; irq_chip->flags = IRQCHIP_SKIP_SET_WAKE; From 43881ec7a88a3d3b2fd6da58168173e135b41fba Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 4 Dec 2013 21:05:56 +0900 Subject: [PATCH 011/171] irqchip: renesas-irqc: Enable mask on suspend Now when lazy interrupt disable has been enabled in the driver then extend the code to set IRQCHIP_MASK_ON_SUSPEND which tells the core that only IRQs marked as wakeups need to stay enabled during Suspend-to-RAM. Signed-off-by: Magnus Damm Cc: rob.herring@calxeda.com Cc: grant.likely@secretlab.ca Cc: horms@verge.net.au Link: http://lkml.kernel.org/r/20131204120556.29642.27021.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-renesas-irqc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 8fdd7d68cd00..082d95cb5528 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -213,7 +213,7 @@ static int irqc_probe(struct platform_device *pdev) irq_chip->irq_mask = irqc_irq_disable; irq_chip->irq_unmask = irqc_irq_enable; irq_chip->irq_set_type = irqc_irq_set_type; - irq_chip->flags = IRQCHIP_SKIP_SET_WAKE; + irq_chip->flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND; p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, p->number_of_irqs, From 397e7b515785cad6e10b29f3001fd80c3f519bb8 Mon Sep 17 00:00:00 2001 From: Daniel Tang Date: Thu, 5 Dec 2013 17:12:17 +1100 Subject: [PATCH 012/171] irqchip: Add support for TI-NSPIRE irqchip This patch adds support for the interrupt controllers found in some TI-Nspire models. FIQ support was taken out to simplify the driver code and may be added in later. Since Linux on this platform doesn't really use FIQs, this wasn't really that important in the first place. [ tglx: Made zevio_handle_irq static and reordered __init functions ] Signed-off-by: Daniel Tang Acked-by: Grant Likely Link: http://lkml.kernel.org/r/1386223937-12189-1-git-send-email-dt.tangr@gmail.com Signed-off-by: Thomas Gleixner --- .../interrupt-controller/lsi,zevio-intc.txt | 18 +++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-zevio.c | 127 ++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt create mode 100644 drivers/irqchip/irq-zevio.c diff --git a/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt new file mode 100644 index 000000000000..aee38e7c13e7 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt @@ -0,0 +1,18 @@ +TI-NSPIRE interrupt controller + +Required properties: +- compatible: Compatible property value should be "lsi,zevio-intc". + +- reg: Physical base address of the controller and length of memory mapped + region. + +- interrupt-controller : Identifies the node as an interrupt controller + +Example: + +interrupt-controller { + compatible = "lsi,zevio-intc"; + interrupt-controller; + reg = <0xDC000000 0x1000>; + #interrupt-cells = <1>; +}; diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index c60b9010b152..292b10645fd8 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -20,5 +20,6 @@ obj-$(CONFIG_SIRF_IRQ) += irq-sirfsoc.o obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o +obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c new file mode 100644 index 000000000000..8ed04c4a43ee --- /dev/null +++ b/drivers/irqchip/irq-zevio.c @@ -0,0 +1,127 @@ +/* + * linux/drivers/irqchip/irq-zevio.c + * + * Copyright (C) 2013 Daniel Tang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "irqchip.h" + +#define IO_STATUS 0x000 +#define IO_RAW_STATUS 0x004 +#define IO_ENABLE 0x008 +#define IO_DISABLE 0x00C +#define IO_CURRENT 0x020 +#define IO_RESET 0x028 +#define IO_MAX_PRIOTY 0x02C + +#define IO_IRQ_BASE 0x000 +#define IO_FIQ_BASE 0x100 + +#define IO_INVERT_SEL 0x200 +#define IO_STICKY_SEL 0x204 +#define IO_PRIORITY_SEL 0x300 + +#define MAX_INTRS 32 +#define FIQ_START MAX_INTRS + +static struct irq_domain *zevio_irq_domain; +static void __iomem *zevio_irq_io; + +static void zevio_irq_ack(struct irq_data *irqd) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(irqd); + struct irq_chip_regs *regs = + &container_of(irqd->chip, struct irq_chip_type, chip)->regs; + + readl(gc->reg_base + regs->ack); +} + +static asmlinkage void __exception_irq_entry zevio_handle_irq(struct pt_regs *regs) +{ + int irqnr; + + while (readl(zevio_irq_io + IO_STATUS)) { + irqnr = readl(zevio_irq_io + IO_CURRENT); + irqnr = irq_find_mapping(zevio_irq_domain, irqnr); + handle_IRQ(irqnr, regs); + }; +} + +static void __init zevio_init_irq_base(void __iomem *base) +{ + /* Disable all interrupts */ + writel(~0, base + IO_DISABLE); + + /* Accept interrupts of all priorities */ + writel(0xF, base + IO_MAX_PRIOTY); + + /* Reset existing interrupts */ + readl(base + IO_RESET); +} + +static int __init zevio_of_init(struct device_node *node, + struct device_node *parent) +{ + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; + struct irq_chip_generic *gc; + int ret; + + if (WARN_ON(zevio_irq_io || zevio_irq_domain)) + return -EBUSY; + + zevio_irq_io = of_iomap(node, 0); + BUG_ON(!zevio_irq_io); + + /* Do not invert interrupt status bits */ + writel(~0, zevio_irq_io + IO_INVERT_SEL); + + /* Disable sticky interrupts */ + writel(0, zevio_irq_io + IO_STICKY_SEL); + + /* We don't use IRQ priorities. Set each IRQ to highest priority. */ + memset_io(zevio_irq_io + IO_PRIORITY_SEL, 0, MAX_INTRS * sizeof(u32)); + + /* Init IRQ and FIQ */ + zevio_init_irq_base(zevio_irq_io + IO_IRQ_BASE); + zevio_init_irq_base(zevio_irq_io + IO_FIQ_BASE); + + zevio_irq_domain = irq_domain_add_linear(node, MAX_INTRS, + &irq_generic_chip_ops, NULL); + BUG_ON(!zevio_irq_domain); + + ret = irq_alloc_domain_generic_chips(zevio_irq_domain, MAX_INTRS, 1, + "zevio_intc", handle_level_irq, + clr, 0, IRQ_GC_INIT_MASK_CACHE); + BUG_ON(ret); + + gc = irq_get_domain_generic_chip(zevio_irq_domain, 0); + gc->reg_base = zevio_irq_io; + gc->chip_types[0].chip.irq_ack = zevio_irq_ack; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; + gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; + gc->chip_types[0].regs.mask = IO_IRQ_BASE + IO_ENABLE; + gc->chip_types[0].regs.enable = IO_IRQ_BASE + IO_ENABLE; + gc->chip_types[0].regs.disable = IO_IRQ_BASE + IO_DISABLE; + gc->chip_types[0].regs.ack = IO_IRQ_BASE + IO_RESET; + + set_handle_irq(zevio_handle_irq); + + pr_info("TI-NSPIRE classic IRQ controller\n"); + return 0; +} + +IRQCHIP_DECLARE(zevio_irq, "lsi,zevio-intc", zevio_of_init); From 23e21ddf6c3d9cb37aa69da496f80e8481e2c8bd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Jan 2014 15:20:01 +0100 Subject: [PATCH 013/171] regulator: ab3100: cast fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AB3100 regulator driver emits a warning when compiled on 64bit systems like this: drivers/regulator/ab3100.c: In function ‘ab3100_regulator_of_probe’: srivers/regulator/ab3100.c:649:4: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] As the int is a different size than the 64bit pointer used to pass regulator data. Switch to using an unsigned long as ID passed for the regulator to get rid of the warning. Reported-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/regulator/ab3100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c index 77b46d0b37a6..e10febe9ec34 100644 --- a/drivers/regulator/ab3100.c +++ b/drivers/regulator/ab3100.c @@ -498,7 +498,7 @@ static int ab3100_regulator_register(struct platform_device *pdev, struct ab3100_platform_data *plfdata, struct regulator_init_data *init_data, struct device_node *np, - int id) + unsigned long id) { struct regulator_desc *desc; struct ab3100_regulator *reg; @@ -646,7 +646,7 @@ ab3100_regulator_of_probe(struct platform_device *pdev, struct device_node *np) err = ab3100_regulator_register( pdev, NULL, ab3100_regulator_matches[i].init_data, ab3100_regulator_matches[i].of_node, - (int) ab3100_regulator_matches[i].driver_data); + (unsigned long)ab3100_regulator_matches[i].driver_data); if (err) { ab3100_regulators_remove(pdev); return err; From fb53a1ab88d14848dc292842e35c3bda3a665997 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 23 Jan 2014 16:13:32 -0600 Subject: [PATCH 014/171] x86/quirks: Add workaround for AMD F16h Erratum792 The workaround for this Erratum is included in AGESA. But BIOSes spun only after Jan2014 will have the fix (atleast server versions of the chip). The erratum affects both embedded and server platforms and since we cannot say with certainity that ALL BIOSes on systems out in the field will have the fix, we should probably insulate ourselves in case BIOS does not do the right thing or someone is using old BIOSes. Refer to Revision Guide for AMD F16h models 00h-0fh, document 51810 Rev. 3.04, November2013 for details on the Erratum. Tested the patch on Fam16h server platform and it works fine. Signed-off-by: Aravind Gopalakrishnan Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1390515212-1824-1-git-send-email-Aravind.Gopalakrishnan@amd.com [ Minor edits. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 04ee1e2e4c02..7c6acd4b8995 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5, quirk_amd_nb_node); #endif + +#ifdef CONFIG_PCI +/* + * Processor does not ensure DRAM scrub read/write sequence + * is atomic wrt accesses to CC6 save state area. Therefore + * if a concurrent scrub read/write access is to same address + * the entry may appear as if it is not written. This quirk + * applies to Fam16h models 00h-0Fh + * + * See "Revision Guide" for AMD F16h models 00h-0fh, + * document 51810 rev. 3.04, Nov 2013 + */ +static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) +{ + u32 val; + + /* + * Suggested workaround: + * set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b + */ + pci_read_config_dword(dev, 0x58, &val); + if (val & 0x1F) { + val &= ~(0x1F); + pci_write_config_dword(dev, 0x58, val); + } + + pci_read_config_dword(dev, 0x5C, &val); + if (val & BIT(0)) { + val &= ~BIT(0); + pci_write_config_dword(dev, 0x5c, val); + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, + amd_disable_seq_and_redirect_scrub); + +#endif From 2993ae3305ad10b41e0d0bc2662f7754ee8e30fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Jan 2014 10:22:09 +0300 Subject: [PATCH 015/171] x86/AMD/NB: Fix amd_set_subcaches() parameter type This is under CAP_SYS_ADMIN, but Smatch complains that mask comes from the user and the test for "mask > 0xf" can underflow. The fix is simple: amd_set_subcaches() should hand down not an 'int' but an 'unsigned long' like it was originally indended to do. Signed-off-by: Dan Carpenter Acked-by: Borislav Petkov Cc: Borislav Petkov Cc: Daniel J Blueman Link: http://lkml.kernel.org/r/20140121072209.GA22095@elgon.mountain Signed-off-by: Ingo Molnar --- arch/x86/include/asm/amd_nb.h | 2 +- arch/x86/kernel/amd_nb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index a54ee1d054d9..aaac3b2fb746 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); extern int amd_numa_init(void); extern int amd_get_subcaches(int); -extern int amd_set_subcaches(int, int); +extern int amd_set_subcaches(int, unsigned long); struct amd_l3_cache { unsigned indices; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 59554dca96ec..dec8de4e1663 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu) return (mask >> (4 * cuid)) & 0xf; } -int amd_set_subcaches(int cpu, int mask) +int amd_set_subcaches(int cpu, unsigned long mask) { static unsigned int reset, ban; struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); From ec65993443736a5091b68e80ff1734548944a4b8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:16 -0800 Subject: [PATCH 016/171] mm, x86: Account for TLB flushes only when debugging Bisection between 3.11 and 3.12 fingered commit 9824cf97 ("mm: vmstats: tlb flush counters") to cause overhead problems. The counters are undeniably useful but how often do we really need to debug TLB flush related issues? It does not justify taking the penalty everywhere so make it a debugging option. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Hugh Dickins Cc: Alex Shi Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-XzxjntugxuwpxXhcrxqqh53b@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 6 +++--- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/mm/tlb.c | 14 +++++++------- include/linux/vm_event_item.h | 4 +++- include/linux/vmstat.h | 8 ++++++++ mm/vmstat.c | 4 +++- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e6d90babc245..04905bfc508b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void) static inline void __flush_tlb_one(unsigned long addr) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr) */ static inline void __flush_tlb_up(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); } static inline void flush_tlb_all(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb_all(); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ce2d0a2c3e4f..0e25a1bc5ab5 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) } /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Save MTRR state */ @@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Intel (P6) standard MTRRs */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ae699b3bbac8..05446c1cccfe 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -103,7 +103,7 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) local_flush_tlb(); @@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, info.flush_start = start; info.flush_end = end; - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (is_uv_system()) { unsigned int cpu; @@ -151,7 +151,7 @@ void flush_tlb_current_task(void) preempt_disable(); - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -215,7 +215,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, /* tlb_flushall_shift is on balance point, details in commit log */ if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { if (has_large_page(mm, start, end)) { @@ -224,7 +224,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, } /* flush range by one by one 'invlpg' */ for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -262,7 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) static void do_flush_tlb_all(void *info) { - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); @@ -270,7 +270,7 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index c557c6d096de..3a712e2e7d76 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -71,12 +71,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif +#ifdef CONFIG_DEBUG_TLBFLUSH #ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif +#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, +#endif /* CONFIG_DEBUG_TLBFLUSH */ NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index e4b948080d20..80ebba9c2e87 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -83,6 +83,14 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_DEBUG_TLBFLUSH +#define count_vm_tlb_event(x) count_vm_event(x) +#define count_vm_tlb_events(x, y) count_vm_events(x, y) +#else +#define count_vm_tlb_event(x) do {} while (0) +#define count_vm_tlb_events(x, y) do { (void)(y); } while (0) +#endif + #define __count_zone_vm_events(item, zone, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ zone_idx(zone), delta) diff --git a/mm/vmstat.c b/mm/vmstat.c index 72496140ac08..def5dd2fbe61 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -851,12 +851,14 @@ const char * const vmstat_text[] = { "thp_zero_page_alloc", "thp_zero_page_alloc_failed", #endif +#ifdef CONFIG_DEBUG_TLBFLUSH #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#endif +#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", +#endif /* CONFIG_DEBUG_TLBFLUSH */ #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; From 15aa368255f249df0b2af630c9487bb5471bd7da Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:18 -0800 Subject: [PATCH 017/171] x86/mm: Clean up inconsistencies when flushing TLB ranges NR_TLB_LOCAL_FLUSH_ALL is not always accounted for correctly and the comparison with total_vm is done before taking tlb_flushall_shift into account. Clean it up. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Alex Shi Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Hugh Dickins Link: http://lkml.kernel.org/n/tip-Iz5gcahrgskIldvukulzi0hh@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 05446c1cccfe..5176526ddd59 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -189,6 +189,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { unsigned long addr; unsigned act_entries, tlb_entries = 0; + unsigned long nr_base_pages; preempt_disable(); if (current->active_mm != mm) @@ -210,18 +211,17 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, tlb_entries = tlb_lli_4k[ENTRIES]; else tlb_entries = tlb_lld_4k[ENTRIES]; + /* Assume all of TLB entries was occupied by this task */ - act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; + act_entries = tlb_entries >> tlb_flushall_shift; + act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; + nr_base_pages = (end - start) >> PAGE_SHIFT; /* tlb_flushall_shift is on balance point, details in commit log */ - if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { + if (nr_base_pages > act_entries || has_large_page(mm, start, end)) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { - if (has_large_page(mm, start, end)) { - local_flush_tlb(); - goto flush_all; - } /* flush range by one by one 'invlpg' */ for (addr = start; addr < end; addr += PAGE_SIZE) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); From 71b54f8263860a37dd9f50f81880a9d681fd9c10 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:19 -0800 Subject: [PATCH 018/171] x86/mm: Eliminate redundant page table walk during TLB range flushing When choosing between doing an address space or ranged flush, the x86 implementation of flush_tlb_mm_range takes into account whether there are any large pages in the range. A per-page flush typically requires fewer entries than would covered by a single large page and the check is redundant. There is one potential exception. THP migration flushes single THP entries and it conceivably would benefit from flushing a single entry instead of the mm. However, this flush is after a THP allocation, copy and page table update potentially with any other threads serialised behind it. In comparison to that, the flush is noise. It makes more sense to optimise balancing to require fewer flushes than to optimise the flush itself. This patch deletes the redundant huge page check. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Alex Shi Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-sgei1drpOcburujPsfh6ovmo@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5176526ddd59..dd8dda167a24 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -158,32 +158,6 @@ void flush_tlb_current_task(void) preempt_enable(); } -/* - * It can find out the THP large page, or - * HUGETLB page in tlb_flush when THP disabled - */ -static inline unsigned long has_large_page(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = ALIGN(start, HPAGE_SIZE); - for (; addr < end; addr += HPAGE_SIZE) { - pgd = pgd_offset(mm, addr); - if (likely(!pgd_none(*pgd))) { - pud = pud_offset(pgd, addr); - if (likely(!pud_none(*pud))) { - pmd = pmd_offset(pud, addr); - if (likely(!pmd_none(*pmd))) - if (pmd_large(*pmd)) - return addr; - } - } - } - return 0; -} - void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { @@ -218,7 +192,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, nr_base_pages = (end - start) >> PAGE_SHIFT; /* tlb_flushall_shift is on balance point, details in commit log */ - if (nr_base_pages > act_entries || has_large_page(mm, start, end)) { + if (nr_base_pages > act_entries) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { From f98b7a772ab51b52ca4d2a14362fc0e0c8a2e0f3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:21 -0800 Subject: [PATCH 019/171] x86: mm: change tlb_flushall_shift for IvyBridge There was a large performance regression that was bisected to commit 611ae8e3 ("x86/tlb: enable tlb flush range support for x86"). This patch simply changes the default balance point between a local and global flush for IvyBridge. In the interest of allowing the tests to be reproduced, this patch was tested using mmtests 0.15 with the following configurations configs/config-global-dhp__tlbflush-performance configs/config-global-dhp__scheduler-performance configs/config-global-dhp__network-performance Results are from two machines Ivybridge 4 threads: Intel(R) Core(TM) i3-3240 CPU @ 3.40GHz Ivybridge 8 threads: Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz Page fault microbenchmark showed nothing interesting. Ebizzy was configured to run multiple iterations and threads. Thread counts ranged from 1 to NR_CPUS*2. For each thread count, it ran 100 iterations and each iteration lasted 10 seconds. Ivybridge 4 threads 3.13.0-rc7 3.13.0-rc7 vanilla altshift-v3 Mean 1 6395.44 ( 0.00%) 6789.09 ( 6.16%) Mean 2 7012.85 ( 0.00%) 8052.16 ( 14.82%) Mean 3 6403.04 ( 0.00%) 6973.74 ( 8.91%) Mean 4 6135.32 ( 0.00%) 6582.33 ( 7.29%) Mean 5 6095.69 ( 0.00%) 6526.68 ( 7.07%) Mean 6 6114.33 ( 0.00%) 6416.64 ( 4.94%) Mean 7 6085.10 ( 0.00%) 6448.51 ( 5.97%) Mean 8 6120.62 ( 0.00%) 6462.97 ( 5.59%) Ivybridge 8 threads 3.13.0-rc7 3.13.0-rc7 vanilla altshift-v3 Mean 1 7336.65 ( 0.00%) 7787.02 ( 6.14%) Mean 2 8218.41 ( 0.00%) 9484.13 ( 15.40%) Mean 3 7973.62 ( 0.00%) 8922.01 ( 11.89%) Mean 4 7798.33 ( 0.00%) 8567.03 ( 9.86%) Mean 5 7158.72 ( 0.00%) 8214.23 ( 14.74%) Mean 6 6852.27 ( 0.00%) 7952.45 ( 16.06%) Mean 7 6774.65 ( 0.00%) 7536.35 ( 11.24%) Mean 8 6510.50 ( 0.00%) 6894.05 ( 5.89%) Mean 12 6182.90 ( 0.00%) 6661.29 ( 7.74%) Mean 16 6100.09 ( 0.00%) 6608.69 ( 8.34%) Ebizzy hits the worst case scenario for TLB range flushing every time and it shows for these Ivybridge CPUs at least that the default choice is a poor on. The patch addresses the problem. Next was a tlbflush microbenchmark written by Alex Shi at http://marc.info/?l=linux-kernel&m=133727348217113 . It measures access costs while the TLB is being flushed. The expectation is that if there are always full TLB flushes that the benchmark would suffer and it benefits from range flushing There are 320 iterations of the test per thread count. The number of entries is randomly selected with a min of 1 and max of 512. To ensure a reasonably even spread of entries, the full range is broken up into 8 sections and a random number selected within that section. iteration 1, random number between 0-64 iteration 2, random number between 64-128 etc This is still a very weak methodology. When you do not know what are typical ranges, random is a reasonable choice but it can be easily argued that the opimisation was for smaller ranges and an even spread is not representative of any workload that matters. To improve this, we'd need to know the probability distribution of TLB flush range sizes for a set of workloads that are considered "common", build a synthetic trace and feed that into this benchmark. Even that is not perfect because it would not account for the time between flushes but there are limits of what can be reasonably done and still be doing something useful. If a representative synthetic trace is provided then this benchmark could be revisited and the shift values retuned. Ivybridge 4 threads 3.13.0-rc7 3.13.0-rc7 vanilla altshift-v3 Mean 1 10.50 ( 0.00%) 10.50 ( 0.03%) Mean 2 17.59 ( 0.00%) 17.18 ( 2.34%) Mean 3 22.98 ( 0.00%) 21.74 ( 5.41%) Mean 5 47.13 ( 0.00%) 46.23 ( 1.92%) Mean 8 43.30 ( 0.00%) 42.56 ( 1.72%) Ivybridge 8 threads 3.13.0-rc7 3.13.0-rc7 vanilla altshift-v3 Mean 1 9.45 ( 0.00%) 9.36 ( 0.93%) Mean 2 9.37 ( 0.00%) 9.70 ( -3.54%) Mean 3 9.36 ( 0.00%) 9.29 ( 0.70%) Mean 5 14.49 ( 0.00%) 15.04 ( -3.75%) Mean 8 41.08 ( 0.00%) 38.73 ( 5.71%) Mean 13 32.04 ( 0.00%) 31.24 ( 2.49%) Mean 16 40.05 ( 0.00%) 39.04 ( 2.51%) For both CPUs, average access time is reduced which is good as this is the benchmark that was used to tune the shift values in the first place albeit it is now known *how* the benchmark was used. The scheduler benchmarks were somewhat inconclusive. They showed gains and losses and makes me reconsider how stable those benchmarks really are or if something else might be interfering with the test results recently. Network benchmarks were inconclusive. Almost all results were flat except for netperf-udp tests on the 4 thread machine. These results were unstable and showed large variations between reboots. It is unknown if this is a recent problems but I've noticed before that netperf-udp results tend to vary. Based on these results, changing the default for Ivybridge seems like a logical choice. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Alex Shi Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-cqnadffh1tiqrshthRj3Esge@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ea04b342c026..bbe1b8b1f1c4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -628,7 +628,7 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) tlb_flushall_shift = 5; break; case 0x63a: /* Ivybridge */ - tlb_flushall_shift = 1; + tlb_flushall_shift = 2; break; default: tlb_flushall_shift = 6; From b9a3b4c976c1209957326537ad5c0bb633dfd764 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:22 -0800 Subject: [PATCH 020/171] mm, x86: Revisit tlb_flushall_shift tuning for page flushes except on IvyBridge There was a large ebizzy performance regression that was bisected to commit 611ae8e3 (x86/tlb: enable tlb flush range support for x86). The problem was related to the tlb_flushall_shift tuning for IvyBridge which was altered. The problem is that it is not clear if the tuning values for each CPU family is correct as the methodology used to tune the values is unclear. This patch uses a conservative tlb_flushall_shift value for all CPU families except IvyBridge so the decision can be revisited if any regression is found as a result of this change. IvyBridge is an exception as testing with one methodology determined that the value of 2 is acceptable. Details are in the changelog for the patch "x86: mm: Change tlb_flushall_shift for IvyBridge". One important aspect of this to watch out for is Xen. The original commit log mentioned large performance gains on Xen. It's possible Xen is more sensitive to this value if it flushes small ranges of pages more frequently than workloads on bare metal typically do. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Alex Shi Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-dyzMww3fqugnhbhgo6Gxmtkw@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 5 +---- arch/x86/kernel/cpu/intel.c | 10 +++------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 59bfebc8c805..96abccaada33 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -768,10 +768,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) { - tlb_flushall_shift = 5; - - if (c->x86 <= 0x11) - tlb_flushall_shift = 4; + tlb_flushall_shift = 6; } static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index bbe1b8b1f1c4..d358a3928b8f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -615,21 +615,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) case 0x61d: /* six-core 45 nm xeon "Dunnington" */ tlb_flushall_shift = -1; break; + case 0x63a: /* Ivybridge */ + tlb_flushall_shift = 2; + break; case 0x61a: /* 45 nm nehalem, "Bloomfield" */ case 0x61e: /* 45 nm nehalem, "Lynnfield" */ case 0x625: /* 32 nm nehalem, "Clarkdale" */ case 0x62c: /* 32 nm nehalem, "Gulftown" */ case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ case 0x62f: /* 32 nm Xeon E7 */ - tlb_flushall_shift = 6; - break; case 0x62a: /* SandyBridge */ case 0x62d: /* SandyBridge, "Romely-EP" */ - tlb_flushall_shift = 5; - break; - case 0x63a: /* Ivybridge */ - tlb_flushall_shift = 2; - break; default: tlb_flushall_shift = 6; } From a85eba8814631d0d48361c8b9a7ee0984e80c03c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 21 Jan 2014 14:33:15 -0800 Subject: [PATCH 021/171] arch/x86/mm/srat: Skip NUMA_NO_NODE while parsing SLIT When ACPI SLIT table has an I/O locality (i.e. a locality unique to an I/O device), numa_set_distance() emits this warning message: NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10 acpi_numa_slit_init() calls numa_set_distance() with pxm_to_node(), which assumes that all localities have been parsed with SRAT previously. SRAT does not list I/O localities, where as SLIT lists all localities including I/Os. Hence, pxm_to_node() returns NUMA_NO_NODE (-1) for an I/O locality. I/O localities are not supported and are ignored today, but emitting such warning message leads to unnecessary confusion. Change acpi_numa_slit_init() to avoid calling numa_set_distance() with NUMA_NO_NODE. Signed-off-by: Toshi Kani Acked-by: David Rientjes Signed-off-by: Andrew Morton Cc: Yinghai Lu Link: http://lkml.kernel.org/n/tip-dSvpjjvp8aMzs1ybkftxohlh@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 266ca912f62e..5ecf65117e6f 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -42,15 +42,25 @@ static __init inline int srat_disabled(void) return acpi_numa < 0; } -/* Callback for SLIT parsing */ +/* + * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for + * I/O localities since SRAT does not list them. I/O localities are + * not supported at this point. + */ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { int i, j; - for (i = 0; i < slit->locality_count; i++) - for (j = 0; j < slit->locality_count; j++) + for (i = 0; i < slit->locality_count; i++) { + if (pxm_to_node(i) == NUMA_NO_NODE) + continue; + for (j = 0; j < slit->locality_count; j++) { + if (pxm_to_node(j) == NUMA_NO_NODE) + continue; numa_set_distance(pxm_to_node(i), pxm_to_node(j), slit->entry[slit->locality_count * i + j]); + } + } } /* Callback for Proximity Domain -> x2APIC mapping */ From edc6bc784028f2ef80dff0ff697363ff5a06e3a3 Mon Sep 17 00:00:00 2001 From: David Cohen Date: Tue, 21 Jan 2014 10:41:39 -0800 Subject: [PATCH 022/171] x86/intel/mid: Fix X86_INTEL_MID dependencies This patch fixes the following warning: warning: (X86_INTEL_MID) selects INTEL_SCU_IPC which has unmet direct dependencies (X86 && X86_PLATFORM_DEVICES && X86_INTEL_MID) It happens because when selected, X86_INTEL_MID tries to select INTEL_SCU_IPC regardless all its dependencies are met or not. This patch fixes it by adding the missing X86_PLATFORM_DEVICES dependency to X86_INTEL_MID. Reported-by: kbuild test robot Signed-off-by: David Cohen Link: http://lkml.kernel.org/r/1390329699-20782-1-git-send-email-david.a.cohen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb9af474dfca..3b6922ebf170 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -443,6 +443,7 @@ config X86_INTEL_MID bool "Intel MID platform support" depends on X86_32 depends on X86_EXTENDED_PLATFORM + depends on X86_PLATFORM_DEVICES depends on PCI depends on PCI_GOANY depends on X86_IO_APIC From 0d4dd797564cddc1f71ab0b239e9ea50ddd40b2a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sat, 25 Jan 2014 23:50:23 +0200 Subject: [PATCH 023/171] perf/doc: Remove mention of non-existent set_perf_event_pending() from design.txt set_perf_event_pending() was removed in e360adbe ("irq_work: Add generic hardirq context callbacks"). Signed-off-by: Baruch Siach Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/4c54761865d40210be0628cb84701afc5d57b5d8.1390686193.git.baruch@tkos.co.il Signed-off-by: Ingo Molnar --- tools/perf/design.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 67e5d0cace85..63a0e6f04a01 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -454,7 +454,6 @@ So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you will need at least this: - asm/perf_event.h - a basic stub will suffice at first - support for atomic64 types (and associated helper functions) - - set_perf_event_pending() implemented If your architecture does have hardware capabilities, you can override the weak stub hw_perf_event_init() to register hardware counters. From 950b8354716eb1f9c0b39777d379efa5f4125c04 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 22 Jan 2014 21:58:46 +0200 Subject: [PATCH 024/171] perf tools: Demangle kernel and kernel module symbols too Some kernels contain C++ code, and thus their symbols need to be demangled. This allows 'perf kvm top' to generate readable output. Signed-off-by: Avi Kivity Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/26f71bf5bf7ee1408e3f1a803556d5df18223ef1.1390420726.git.avi@cloudius-systems.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 759456728703..8f12f0f5101d 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -922,6 +922,7 @@ int dso__load_sym(struct dso *dso, struct map *map, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } +new_symbol: /* * We need to figure out if the object was created from C++ sources * DWARF DW_compile_unit has this, but we don't always have access @@ -933,7 +934,6 @@ int dso__load_sym(struct dso *dso, struct map *map, if (demangled != NULL) elf_name = demangled; } -new_symbol: f = symbol__new(sym.st_value, sym.st_size, GELF_ST_BIND(sym.st_info), elf_name); free(demangled); From d4b4ff8e28b474fac0fbfa9cfc40f88b9e41e380 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:37 -0700 Subject: [PATCH 025/171] NVMe: Schedule reset for failed controllers Schedules a controller reset when it indicates it has a failed status. If the device does not become ready after a reset, the pci device will be scheduled for removal. Signed-off-by: Keith Busch [fixed checkpatch issue] Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 21 +++++++++++++++++++-- include/linux/nvme.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 000bca43c23b..2f5b9f5f5a21 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -60,6 +60,8 @@ static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; +static void nvme_reset_failed_dev(struct work_struct *ws); + /* * An NVM Express queue. Each device has at least two (one for admin * commands and one for I/O commands). @@ -1612,13 +1614,25 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq) static int nvme_kthread(void *data) { - struct nvme_dev *dev; + struct nvme_dev *dev, *next; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); spin_lock(&dev_list_lock); - list_for_each_entry(dev, &dev_list, node) { + list_for_each_entry_safe(dev, next, &dev_list, node) { int i; + if (readl(&dev->bar->csts) & NVME_CSTS_CFS && + dev->initialized) { + if (work_busy(&dev->reset_work)) + continue; + list_del_init(&dev->node); + dev_warn(&dev->pci_dev->dev, + "Failed status, reset controller\n"); + INIT_WORK(&dev->reset_work, + nvme_reset_failed_dev); + queue_work(nvme_workq, &dev->reset_work); + continue; + } for (i = 0; i < dev->queue_count; i++) { struct nvme_queue *nvmeq = dev->queues[i]; if (!nvmeq) @@ -2006,6 +2020,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) { int i; + dev->initialized = 0; for (i = dev->queue_count - 1; i >= 0; i--) nvme_disable_queue(dev, i); @@ -2196,6 +2211,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); } + dev->initialized = 1; return 0; } @@ -2269,6 +2285,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto remove; + dev->initialized = 1; kref_init(&dev->kref); return 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index eed81cc56d7e..117d877e8be5 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -95,6 +95,7 @@ struct nvme_dev { u32 max_hw_sectors; u32 stripe_size; u16 oncs; + u8 initialized; }; /* From c30341dc3c436cf43508cd44cdfbb3810c38c195 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:38 -0700 Subject: [PATCH 026/171] NVMe: Abort timed out commands Send nvme abort command to io requests that have timed out on an initialized device. If the command is not returned after another timeout, schedule the controller for reset. Signed-off-by: Keith Busch [fix endianness issues] Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 68 ++++++++++++++++++++++++++++++++++++++- include/linux/nvme.h | 1 + include/uapi/linux/nvme.h | 11 +++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 2f5b9f5f5a21..75049532df96 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -83,6 +83,7 @@ struct nvme_queue { u16 sq_head; u16 sq_tail; u16 cq_head; + u16 qid; u8 cq_phase; u8 cqe_seen; u8 q_suspended; @@ -100,6 +101,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(sizeof(struct nvme_features) != 64); BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); @@ -114,6 +116,7 @@ struct nvme_cmd_info { nvme_completion_fn fn; void *ctx; unsigned long timeout; + int aborted; }; static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) @@ -157,6 +160,7 @@ static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, info[cmdid].fn = handler; info[cmdid].ctx = ctx; info[cmdid].timeout = jiffies + timeout; + info[cmdid].aborted = 0; return cmdid; } @@ -175,6 +179,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) #define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) +#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) static void special_completion(struct nvme_dev *dev, void *ctx, struct nvme_completion *cqe) @@ -183,6 +188,10 @@ static void special_completion(struct nvme_dev *dev, void *ctx, return; if (ctx == CMD_CTX_FLUSH) return; + if (ctx == CMD_CTX_ABORT) { + ++dev->abort_limit; + return; + } if (ctx == CMD_CTX_COMPLETED) { dev_warn(&dev->pci_dev->dev, "completed id %d twice on queue %d\n", @@ -1004,6 +1013,56 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, return nvme_submit_admin_cmd(dev, &c, result); } +/** + * nvme_abort_cmd - Attempt aborting a command + * @cmdid: Command id of a timed out IO + * @queue: The queue with timed out IO + * + * Schedule controller reset if the command was already aborted once before and + * still hasn't been returned to the driver, or if this is the admin queue. + */ +static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) +{ + int a_cmdid; + struct nvme_command cmd; + struct nvme_dev *dev = nvmeq->dev; + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + + if (!nvmeq->qid || info[cmdid].aborted) { + if (work_busy(&dev->reset_work)) + return; + list_del_init(&dev->node); + dev_warn(&dev->pci_dev->dev, + "I/O %d QID %d timeout, reset controller\n", cmdid, + nvmeq->qid); + INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); + queue_work(nvme_workq, &dev->reset_work); + return; + } + + if (!dev->abort_limit) + return; + + a_cmdid = alloc_cmdid(dev->queues[0], CMD_CTX_ABORT, special_completion, + ADMIN_TIMEOUT); + if (a_cmdid < 0) + return; + + memset(&cmd, 0, sizeof(cmd)); + cmd.abort.opcode = nvme_admin_abort_cmd; + cmd.abort.cid = cmdid; + cmd.abort.sqid = cpu_to_le16(nvmeq->qid); + cmd.abort.command_id = a_cmdid; + + --dev->abort_limit; + info[cmdid].aborted = 1; + info[cmdid].timeout = jiffies + ADMIN_TIMEOUT; + + dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid, + nvmeq->qid); + nvme_submit_cmd(dev->queues[0], &cmd); +} + /** * nvme_cancel_ios - Cancel outstanding I/Os * @queue: The queue to cancel I/Os on @@ -1027,7 +1086,12 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) continue; if (info[cmdid].ctx == CMD_CTX_CANCELLED) continue; - dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); + if (timeout && nvmeq->dev->initialized) { + nvme_abort_cmd(cmdid, nvmeq); + continue; + } + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid, + nvmeq->qid); ctx = cancel_cmdid(nvmeq, cmdid, &fn); fn(nvmeq->dev, ctx, &cqe); } @@ -1119,6 +1183,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->cq_vector = vector; + nvmeq->qid = qid; nvmeq->q_suspended = 1; dev->queue_count++; @@ -1929,6 +1994,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ctrl = mem; nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); + dev->abort_limit = ctrl->acl + 1; memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 117d877e8be5..69ae03f6eb15 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -95,6 +95,7 @@ struct nvme_dev { u32 max_hw_sectors; u32 stripe_size; u16 oncs; + u16 abort_limit; u8 initialized; }; diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index 989c04e0c563..e5ab62201119 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -350,6 +350,16 @@ struct nvme_delete_queue { __u32 rsvd11[5]; }; +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + struct nvme_download_firmware { __u8 opcode; __u8 flags; @@ -384,6 +394,7 @@ struct nvme_command { struct nvme_download_firmware dlfw; struct nvme_format_cmd format; struct nvme_dsm_cmd dsm; + struct nvme_abort_cmd abort; }; }; From 0e53d18051725da46cbccfb7874a6422d4d4f274 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:39 -0700 Subject: [PATCH 027/171] NVMe: Surprise removal handling This adds checks to see if the nvme pci device was removed. The check reads the status register for the value of -1, which it should never be unless the device is no longer present. If a user performs a surprise removal on an nvme device, the driver will be notified either by the pci driver remove callback if the platform's slot is capable of this event, or via reading the device BAR status register, which will indicate controller failure and trigger a reset. Either way, the device is not present so all outstanding commands would timeout. This will not send queue deletion commands to a drive that isn't present and fail after ioremap, significantly speeding up surprise removal; previously this took over 2 minutes per IO queue pair created, but this will complete removing the device within a few seconds. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 75049532df96..a51126129784 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1140,8 +1140,9 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); - /* Don't tell the adapter to delete the admin queue */ - if (qid) { + /* Don't tell the adapter to delete the admin queue. + * Don't tell a removed adapter to delete IO queues. */ + if (qid && readl(&dev->bar->csts) != -1) { adapter_delete_sq(dev, qid); adapter_delete_cq(dev, qid); } @@ -2052,12 +2053,18 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); if (!dev->bar) goto disable; - + if (readl(&dev->bar->csts) == -1) { + result = -ENODEV; + goto unmap; + } dev->db_stride = 1 << NVME_CAP_STRIDE(readq(&dev->bar->cap)); dev->dbs = ((void __iomem *)dev->bar) + 4096; return 0; + unmap: + iounmap(dev->bar); + dev->bar = NULL; disable: pci_release_regions(pdev); disable_pci: From 4d115420707afcabe77d2535e092356df6664b70 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:40 -0700 Subject: [PATCH 028/171] NVMe: Async IO queue deletion This attempts to delete all IO queues at the same time asynchronously on shutdown. This is necessary for a present device that is not responding; a shutdown operation previously would take 2 minutes per queue-pair to timeout before moving on to the next queue, making a device removal appear to take a very long time or "hung" as reported by users. In the previous worst case, a removal may be stuck forever until a kill signal is given if there are more than 32 queue pairs since it would run out of admin command IDs after over an hour of timed out sync commands (admin queue depth is 64). This patch will wait for the admin command timeout for all commands to complete, so the worst case now for an unresponsive controller is 60 seconds, though that still seems like a long time. Since this adds another way to take queues offline, some duplicate code resulted so I moved these into more convienient functions. Signed-off-by: Keith Busch [make functions static, correct line length and whitespace issues] Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 229 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 217 insertions(+), 12 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a51126129784..1c8a82fbbc37 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -62,6 +62,14 @@ static struct workqueue_struct *nvme_workq; static void nvme_reset_failed_dev(struct work_struct *ws); +struct async_cmd_info { + struct kthread_work work; + struct kthread_worker *worker; + u32 result; + int status; + void *ctx; +}; + /* * An NVM Express queue. Each device has at least two (one for admin * commands and one for I/O commands). @@ -87,6 +95,7 @@ struct nvme_queue { u8 cq_phase; u8 cqe_seen; u8 q_suspended; + struct async_cmd_info cmdinfo; unsigned long cmdid_data[]; }; @@ -208,6 +217,15 @@ static void special_completion(struct nvme_dev *dev, void *ctx, dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx); } +static void async_completion(struct nvme_dev *dev, void *ctx, + struct nvme_completion *cqe) +{ + struct async_cmd_info *cmdinfo = ctx; + cmdinfo->result = le32_to_cpup(&cqe->result); + cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; + queue_kthread_work(cmdinfo->worker, &cmdinfo->work); +} + /* * Called with local interrupts disabled and the q_lock held. May not sleep. */ @@ -898,12 +916,34 @@ int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, return cmdinfo.status; } +static int nvme_submit_async_cmd(struct nvme_queue *nvmeq, + struct nvme_command *cmd, + struct async_cmd_info *cmdinfo, unsigned timeout) +{ + int cmdid; + + cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout); + if (cmdid < 0) + return cmdid; + cmdinfo->status = -EINTR; + cmd->common.command_id = cmdid; + nvme_submit_cmd(nvmeq, cmd); + return 0; +} + int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); } +static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, + struct nvme_command *cmd, struct async_cmd_info *cmdinfo) +{ + return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo, + ADMIN_TIMEOUT); +} + static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) { int status; @@ -1124,15 +1164,20 @@ static void nvme_free_queues(struct nvme_dev *dev) } } -static void nvme_disable_queue(struct nvme_dev *dev, int qid) +/** + * nvme_suspend_queue - put queue into suspended state + * @nvmeq - queue to suspend + * + * Returns 1 if already suspended, 0 otherwise. + */ +static int nvme_suspend_queue(struct nvme_queue *nvmeq) { - struct nvme_queue *nvmeq = dev->queues[qid]; - int vector = dev->entry[nvmeq->cq_vector].vector; + int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; spin_lock_irq(&nvmeq->q_lock); if (nvmeq->q_suspended) { spin_unlock_irq(&nvmeq->q_lock); - return; + return 1; } nvmeq->q_suspended = 1; spin_unlock_irq(&nvmeq->q_lock); @@ -1140,17 +1185,33 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); + return 0; +} + +static void nvme_clear_queue(struct nvme_queue *nvmeq) +{ + spin_lock_irq(&nvmeq->q_lock); + nvme_process_cq(nvmeq); + nvme_cancel_ios(nvmeq, false); + spin_unlock_irq(&nvmeq->q_lock); +} + +static void nvme_disable_queue(struct nvme_dev *dev, int qid) +{ + struct nvme_queue *nvmeq = dev->queues[qid]; + + if (!nvmeq) + return; + if (nvme_suspend_queue(nvmeq)) + return; + /* Don't tell the adapter to delete the admin queue. * Don't tell a removed adapter to delete IO queues. */ if (qid && readl(&dev->bar->csts) != -1) { adapter_delete_sq(dev, qid); adapter_delete_cq(dev, qid); } - - spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - nvme_cancel_ios(nvmeq, false); - spin_unlock_irq(&nvmeq->q_lock); + nvme_clear_queue(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, @@ -2089,20 +2150,164 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_disable_device(dev->pci_dev); } +struct nvme_delq_ctx { + struct task_struct *waiter; + struct kthread_worker *worker; + atomic_t refcount; +}; + +static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev) +{ + dq->waiter = current; + mb(); + + for (;;) { + set_current_state(TASK_KILLABLE); + if (!atomic_read(&dq->refcount)) + break; + if (!schedule_timeout(ADMIN_TIMEOUT) || + fatal_signal_pending(current)) { + set_current_state(TASK_RUNNING); + + nvme_disable_ctrl(dev, readq(&dev->bar->cap)); + nvme_disable_queue(dev, 0); + + send_sig(SIGKILL, dq->worker->task, 1); + flush_kthread_worker(dq->worker); + return; + } + } + set_current_state(TASK_RUNNING); +} + +static void nvme_put_dq(struct nvme_delq_ctx *dq) +{ + atomic_dec(&dq->refcount); + if (dq->waiter) + wake_up_process(dq->waiter); +} + +static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq) +{ + atomic_inc(&dq->refcount); + return dq; +} + +static void nvme_del_queue_end(struct nvme_queue *nvmeq) +{ + struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; + + nvme_clear_queue(nvmeq); + nvme_put_dq(dq); +} + +static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, + kthread_work_func_t fn) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.delete_queue.opcode = opcode; + c.delete_queue.qid = cpu_to_le16(nvmeq->qid); + + init_kthread_work(&nvmeq->cmdinfo.work, fn); + return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo); +} + +static void nvme_del_cq_work_handler(struct kthread_work *work) +{ + struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, + cmdinfo.work); + nvme_del_queue_end(nvmeq); +} + +static int nvme_delete_cq(struct nvme_queue *nvmeq) +{ + return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq, + nvme_del_cq_work_handler); +} + +static void nvme_del_sq_work_handler(struct kthread_work *work) +{ + struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, + cmdinfo.work); + int status = nvmeq->cmdinfo.status; + + if (!status) + status = nvme_delete_cq(nvmeq); + if (status) + nvme_del_queue_end(nvmeq); +} + +static int nvme_delete_sq(struct nvme_queue *nvmeq) +{ + return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq, + nvme_del_sq_work_handler); +} + +static void nvme_del_queue_start(struct kthread_work *work) +{ + struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, + cmdinfo.work); + allow_signal(SIGKILL); + if (nvme_delete_sq(nvmeq)) + nvme_del_queue_end(nvmeq); +} + +static void nvme_disable_io_queues(struct nvme_dev *dev) +{ + int i; + DEFINE_KTHREAD_WORKER_ONSTACK(worker); + struct nvme_delq_ctx dq; + struct task_struct *kworker_task = kthread_run(kthread_worker_fn, + &worker, "nvme%d", dev->instance); + + if (IS_ERR(kworker_task)) { + dev_err(&dev->pci_dev->dev, + "Failed to create queue del task\n"); + for (i = dev->queue_count - 1; i > 0; i--) + nvme_disable_queue(dev, i); + return; + } + + dq.waiter = NULL; + atomic_set(&dq.refcount, 0); + dq.worker = &worker; + for (i = dev->queue_count - 1; i > 0; i--) { + struct nvme_queue *nvmeq = dev->queues[i]; + + if (nvme_suspend_queue(nvmeq)) + continue; + nvmeq->cmdinfo.ctx = nvme_get_dq(&dq); + nvmeq->cmdinfo.worker = dq.worker; + init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start); + queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work); + } + nvme_wait_dq(&dq, dev); + kthread_stop(kworker_task); +} + static void nvme_dev_shutdown(struct nvme_dev *dev) { int i; dev->initialized = 0; - for (i = dev->queue_count - 1; i >= 0; i--) - nvme_disable_queue(dev, i); spin_lock(&dev_list_lock); list_del_init(&dev->node); spin_unlock(&dev_list_lock); - if (dev->bar) + if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { + for (i = dev->queue_count - 1; i >= 0; i--) { + struct nvme_queue *nvmeq = dev->queues[i]; + nvme_suspend_queue(nvmeq); + nvme_clear_queue(nvmeq); + } + } else { + nvme_disable_io_queues(dev); nvme_shutdown_ctrl(dev); + nvme_disable_queue(dev, 0); + } nvme_dev_unmap(dev); } From 469071a37afc8a627b6b2ddf29db0a097d864845 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 9 Dec 2013 12:58:46 -0500 Subject: [PATCH 029/171] NVMe: Dynamically allocate partition numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some users need more than 64 partitions per device. Rather than simply increasing the number of partitions, switch to the dynamic partition allocation scheme. This means that minor numbers are not stable across boots, but since major numbers aren't either, I cannot see this being a significant problem. Tested-by: Matias Bjørling Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 1c8a82fbbc37..7b615a063e92 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -46,7 +46,6 @@ #define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -#define NVME_MINORS 64 #define ADMIN_TIMEOUT (60 * HZ) static int nvme_major; @@ -1780,33 +1779,6 @@ static int nvme_kthread(void *data) return 0; } -static DEFINE_IDA(nvme_index_ida); - -static int nvme_get_ns_idx(void) -{ - int index, error; - - do { - if (!ida_pre_get(&nvme_index_ida, GFP_KERNEL)) - return -1; - - spin_lock(&dev_list_lock); - error = ida_get_new(&nvme_index_ida, &index); - spin_unlock(&dev_list_lock); - } while (error == -EAGAIN); - - if (error) - index = -1; - return index; -} - -static void nvme_put_ns_idx(int index) -{ - spin_lock(&dev_list_lock); - ida_remove(&nvme_index_ida, index); - spin_unlock(&dev_list_lock); -} - static void nvme_config_discard(struct nvme_ns *ns) { u32 logical_block_size = queue_logical_block_size(ns->queue); @@ -1840,7 +1812,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, ns->dev = dev; ns->queue->queuedata = ns; - disk = alloc_disk(NVME_MINORS); + disk = alloc_disk(0); if (!disk) goto out_free_queue; ns->ns_id = nsid; @@ -1853,12 +1825,12 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); disk->major = nvme_major; - disk->minors = NVME_MINORS; - disk->first_minor = NVME_MINORS * nvme_get_ns_idx(); + disk->first_minor = 0; disk->fops = &nvme_fops; disk->private_data = ns; disk->queue = ns->queue; disk->driverfs_dev = &dev->pci_dev->dev; + disk->flags = GENHD_FL_EXT_DEVT; sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); @@ -1876,9 +1848,7 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, static void nvme_ns_free(struct nvme_ns *ns) { - int index = ns->disk->first_minor / NVME_MINORS; put_disk(ns->disk); - nvme_put_ns_idx(index); blk_cleanup_queue(ns->queue); kfree(ns); } From a1a5ef9998b1379780790b878457b0e0f48b25db Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 16 Dec 2013 13:50:00 -0500 Subject: [PATCH 030/171] NVMe: Disable admin queue on init failure Disable the admin queue if device fails during initialization so the queue's irq is freed. Signed-off-by: Keith Busch [rewritten to use nvme_free_queues] Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 7b615a063e92..89021d710ec2 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1152,11 +1152,11 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) kfree(nvmeq); } -static void nvme_free_queues(struct nvme_dev *dev) +static void nvme_free_queues(struct nvme_dev *dev, int lowest) { int i; - for (i = dev->queue_count - 1; i >= 0; i--) { + for (i = dev->queue_count - 1; i >= lowest; i--) { nvme_free_queue(dev->queues[i]); dev->queue_count--; dev->queues[i] = NULL; @@ -1991,7 +1991,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return 0; free_queues: - nvme_free_queues(dev); + nvme_free_queues(dev, 1); return result; } @@ -2411,6 +2411,7 @@ static int nvme_dev_start(struct nvme_dev *dev) return result; disable: + nvme_disable_queue(dev, 0); spin_lock(&dev_list_lock); list_del_init(&dev->node); spin_unlock(&dev_list_lock); @@ -2542,7 +2543,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) shutdown: nvme_dev_shutdown(dev); release_pools: - nvme_free_queues(dev); + nvme_free_queues(dev, 0); nvme_release_prp_pools(dev); release: nvme_release_instance(dev); @@ -2566,7 +2567,7 @@ static void nvme_remove(struct pci_dev *pdev) misc_deregister(&dev->miscdev); nvme_dev_remove(dev); nvme_dev_shutdown(dev); - nvme_free_queues(dev); + nvme_free_queues(dev, 0); nvme_release_instance(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); From 09ece1424f52a2d3ad0ab715e96b0fb342af4f03 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 27 Jan 2014 11:29:40 -0500 Subject: [PATCH 031/171] NVMe: Add a pci_driver shutdown method We need to shut down the device cleanly when the system is being shut down. This was in an earlier patch but was inadvertently lost during a rewrite. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 89021d710ec2..eaace767be49 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2554,6 +2554,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return result; } +static void nvme_shutdown(struct pci_dev *pdev) +{ + struct nvme_dev *dev = pci_get_drvdata(pdev); + nvme_dev_shutdown(dev); +} + static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); @@ -2625,6 +2631,7 @@ static struct pci_driver nvme_driver = { .id_table = nvme_id_table, .probe = nvme_probe, .remove = nvme_remove, + .shutdown = nvme_shutdown, .driver = { .pm = &nvme_dev_pm_ops, }, From 3193f07bb7ae723f33ac8e8b9db317a4f68d7d18 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 27 Jan 2014 15:57:22 -0500 Subject: [PATCH 032/171] NVMe: Include device and queue numbers in interrupt name On larger systems with many drives, it may help debugging to know which queue is tied to which interrupt, just by looking at /proc/interrupts. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index eaace767be49..5ffc26937226 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -76,6 +76,7 @@ struct async_cmd_info { struct nvme_queue { struct device *q_dmadev; struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ spinlock_t q_lock; struct nvme_command *sq_cmds; volatile struct nvme_completion *cqes; @@ -1235,6 +1236,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_dmadev = dmadev; nvmeq->dev = dev; + snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", + dev->instance, qid); spin_lock_init(&nvmeq->q_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; @@ -1297,7 +1300,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) if (result < 0) goto release_cq; - result = queue_request_irq(dev, nvmeq, "nvme"); + result = queue_request_irq(dev, nvmeq, nvmeq->irqname); if (result < 0) goto release_sq; @@ -1415,7 +1418,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result) return result; - result = queue_request_irq(dev, nvmeq, "nvme admin"); + result = queue_request_irq(dev, nvmeq, nvmeq->irqname); if (result) return result; @@ -1873,6 +1876,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static int nvme_setup_io_queues(struct nvme_dev *dev) { + struct nvme_queue *adminq = dev->queues[0]; struct pci_dev *pdev = dev->pci_dev; int result, cpu, i, vecs, nr_io_queues, size, q_depth; @@ -1899,7 +1903,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Deregister the admin queue's interrupt */ - free_irq(dev->entry[0].vector, dev->queues[0]); + free_irq(dev->entry[0].vector, adminq); vecs = nr_io_queues; for (i = 0; i < vecs; i++) @@ -1937,9 +1941,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ nr_io_queues = vecs; - result = queue_request_irq(dev, dev->queues[0], "nvme admin"); + result = queue_request_irq(dev, adminq, adminq->irqname); if (result) { - dev->queues[0]->q_suspended = 1; + adminq->q_suspended = 1; goto free_queues; } From 317b5684d52269b75b4ec6480f9dac056d0d4ba8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Jan 2014 17:34:07 +0000 Subject: [PATCH 033/171] regulator: core: Correct default return value for full constraints Once we have full constraints then all supply mappings should be known to the regulator API. This means that we should treat failed lookups as fatal rather than deferring in the hope of further registrations but this was broken by commit 9b92da1f1205bd25 "regulator: core: Fix default return value for _get()" which was targeted at DT systems but unintentionally broke non-DT systems by changing the default return value. Fix this by explicitly returning -EPROBE_DEFER from the DT lookup if we find a property but no corresponding regulator and by having the non-DT case default to -ENODEV when we have full constraints. Fixes: 9b92da1f1205bd25 "regulator: core: Fix default return value for _get()" Signed-off-by: Mark Brown Tested-by: Guenter Roeck Cc: stable@vger.kernel.org --- drivers/regulator/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b38a6b669e8c..16a309e5c024 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1272,6 +1272,8 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev, if (r->dev.parent && node == r->dev.of_node) return r; + *ret = -EPROBE_DEFER; + return NULL; } else { /* * If we couldn't even get the node then it's @@ -1312,7 +1314,7 @@ static struct regulator *_regulator_get(struct device *dev, const char *id, struct regulator_dev *rdev; struct regulator *regulator = ERR_PTR(-EPROBE_DEFER); const char *devname = NULL; - int ret = -EPROBE_DEFER; + int ret; if (id == NULL) { pr_err("get() with no identifier\n"); @@ -1322,6 +1324,11 @@ static struct regulator *_regulator_get(struct device *dev, const char *id, if (dev) devname = dev_name(dev); + if (have_full_constraints()) + ret = -ENODEV; + else + ret = -EPROBE_DEFER; + mutex_lock(®ulator_list_mutex); rdev = regulator_dev_lookup(dev, id, &ret); From 7de8246eba58c6f4c315b8c69fb29e445ccf3c80 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 Jan 2014 09:38:37 -0800 Subject: [PATCH 034/171] [IA64] Wire up new sched_setattr and sched_getattr syscalls New syscalls for v3.14 Signed-off-by: Tony Luck --- arch/ia64/include/asm/unistd.h | 2 +- arch/ia64/include/uapi/asm/unistd.h | 2 ++ arch/ia64/kernel/entry.S | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index afd45e0d552e..ae763d8bf55a 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 312 /* length of syscall table */ +#define NR_syscalls 314 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 34fd6fe46da1..715e85f858de 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -325,5 +325,7 @@ #define __NR_process_vm_writev 1333 #define __NR_accept4 1334 #define __NR_finit_module 1335 +#define __NR_sched_setattr 1336 +#define __NR_sched_getattr 1337 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ddea607f948a..fa8d61a312a7 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1773,6 +1773,8 @@ sys_call_table: data8 sys_process_vm_writev data8 sys_accept4 data8 sys_finit_module // 1335 + data8 sys_sched_setattr + data8 sys_sched_getattr .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ From bdfd70fde389412be60f7e8aaed5732dc26fc8ac Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 29 Jan 2014 09:33:52 -0500 Subject: [PATCH 035/171] NVMe: Correct uses of INIT_WORK We need to initialise the work_struct when we initialise the rest of the struct nvme_dev, otherwise we'll hit a lockdep warning when we remove the device. Use PREPARE_WORK to change the function pointer instead of INIT_WORK. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 5ffc26937226..23728099e36f 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1075,7 +1075,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) dev_warn(&dev->pci_dev->dev, "I/O %d QID %d timeout, reset controller\n", cmdid, nvmeq->qid); - INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); + PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev); queue_work(nvme_workq, &dev->reset_work); return; } @@ -1757,7 +1757,7 @@ static int nvme_kthread(void *data) list_del_init(&dev->node); dev_warn(&dev->pci_dev->dev, "Failed status, reset controller\n"); - INIT_WORK(&dev->reset_work, + PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev); queue_work(nvme_workq, &dev->reset_work); continue; @@ -2460,7 +2460,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) return ret; if (ret == -EBUSY) { spin_lock(&dev_list_lock); - INIT_WORK(&dev->reset_work, nvme_remove_disks); + PREPARE_WORK(&dev->reset_work, nvme_remove_disks); queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); } @@ -2507,6 +2507,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto free; INIT_LIST_HEAD(&dev->namespaces); + INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2605,7 +2606,7 @@ static int nvme_resume(struct device *dev) struct nvme_dev *ndev = pci_get_drvdata(pdev); if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { - INIT_WORK(&ndev->reset_work, nvme_reset_failed_dev); + PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev); queue_work(nvme_workq, &ndev->reset_work); } return 0; From f428ebd184c82a7914b2aa7e9f868918aaf7ea78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jan 2014 16:40:02 +0100 Subject: [PATCH 036/171] perf tools: Fix AAAAARGH64 memory barriers Someone got the load and store barriers mixed up for AAAAARGH64. Turn them the right side up. Reported-by: Will Deacon Signed-off-by: Peter Zijlstra Fixes: a94d342b9cb0 ("tools/perf: Add required memory barriers") Cc: Ingo Molnar Cc: Will Deacon Link: http://lkml.kernel.org/r/20140124154002.GF31570@twins.programming.kicks-ass.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 7daa806d9050..e84fa26bc1be 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -100,8 +100,8 @@ #ifdef __aarch64__ #define mb() asm volatile("dmb ish" ::: "memory") -#define wmb() asm volatile("dmb ishld" ::: "memory") -#define rmb() asm volatile("dmb ishst" ::: "memory") +#define wmb() asm volatile("dmb ishst" ::: "memory") +#define rmb() asm volatile("dmb ishld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif From 0519e9ad89e5cd6e6b08398f57c6a71d9580564c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 16 Jan 2014 16:01:11 +0100 Subject: [PATCH 037/171] crypto: s390 - fix concurrency issue in aes-ctr mode The aes-ctr mode uses one preallocated page without any concurrency protection. When multiple threads run aes-ctr encryption or decryption this can lead to data corruption. The patch introduces locking for the page and a fallback solution with slower en/decryption performance in concurrency situations. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 65 ++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b3feabd39f31..cf3c0089bef2 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "crypt_s390.h" #define AES_KEYLEN_128 1 @@ -32,6 +33,7 @@ #define AES_KEYLEN_256 4 static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); static char keylen_flag; struct s390_aes_ctx { @@ -758,43 +760,67 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return aes_set_key(tfm, in_key, key_len); } +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr + i, AES_BLOCK_SIZE); + } + return n; +} + static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); - unsigned int i, n, nbytes; - u8 buf[AES_BLOCK_SIZE]; - u8 *out, *in; + unsigned int n, nbytes; + u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; if (!walk->nbytes) return ret; - memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE); + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { out = walk->dst.virt.addr; in = walk->src.virt.addr; while (nbytes >= AES_BLOCK_SIZE) { - /* only use complete blocks, max. PAGE_SIZE */ - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : - nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { - memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(ctrblk + i, AES_BLOCK_SIZE); - } - ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk); - if (ret < 0 || ret != n) + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = AES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, sctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); return -EIO; + } if (n > AES_BLOCK_SIZE) - memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE, + memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - crypto_inc(ctrblk, AES_BLOCK_SIZE); + crypto_inc(ctrptr, AES_BLOCK_SIZE); out += n; in += n; nbytes -= n; } ret = blkcipher_walk_done(desc, walk, nbytes); } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ @@ -802,14 +828,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, out = walk->dst.virt.addr; in = walk->src.virt.addr; ret = crypt_s390_kmctr(func, sctx->key, buf, in, - AES_BLOCK_SIZE, ctrblk); + AES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != AES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); + crypto_inc(ctrbuf, AES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE); } - memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE); + return ret; } From adc3fcf1552b6e406d172fd9690bbd1395053d13 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 22 Jan 2014 13:00:04 +0100 Subject: [PATCH 038/171] crypto: s390 - fix des and des3_ede cbc concurrency issue In s390 des and des3_ede cbc mode the iv value is not protected against concurrency access and modifications from another running en/decrypt operation which is using the very same tfm struct instance. This fix copies the iv to the local stack before the crypto operation and stores the value back when done. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 200f2a1b599d..82a0a2ad2494 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -105,29 +105,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, } static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, - u8 *iv, struct blkcipher_walk *walk) + struct blkcipher_walk *walk) { + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; + struct { + u8 iv[DES_BLOCK_SIZE]; + u8 key[DES3_KEY_SIZE]; + } param; if (!nbytes) goto out; - memcpy(iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.key, ctx->key, DES3_KEY_SIZE); do { /* only use complete blocks */ unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, iv, out, in, n); + ret = crypt_s390_kmc(func, ¶m, out, in, n); if (ret < 0 || ret != n) return -EIO; nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, iv, DES_BLOCK_SIZE); + memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); out: return ret; @@ -179,22 +185,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk); } static struct crypto_alg cbc_des_alg = { @@ -327,22 +331,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk); } static int cbc_des3_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk); } static struct crypto_alg cbc_des3_alg = { From ee97dc7db4cbda33e4241c2d85b42d1835bc8a35 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 22 Jan 2014 13:01:33 +0100 Subject: [PATCH 039/171] crypto: s390 - fix des and des3_ede ctr concurrency issue In s390 des and 3des ctr mode there is one preallocated page used to speed up the en/decryption. This page is not protected against concurrent usage and thus there is a potential of data corruption with multiple threads. The fix introduces locking/unlocking the ctr page and a slower fallback solution at concurrency situations. Cc: stable@vger.kernel.org Signed-off-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 69 ++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 82a0a2ad2494..0a5aac8a9412 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -25,6 +25,7 @@ #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; @@ -368,54 +369,80 @@ static struct crypto_alg cbc_des3_alg = { } }; +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + crypto_inc(ctrptr + i, DES_BLOCK_SIZE); + } + return n; +} + static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, - struct s390_des_ctx *ctx, struct blkcipher_walk *walk) + struct s390_des_ctx *ctx, + struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); - unsigned int i, n, nbytes; - u8 buf[DES_BLOCK_SIZE]; - u8 *out, *in; + unsigned int n, nbytes; + u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; - memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE); + if (!walk->nbytes) + return ret; + + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { out = walk->dst.virt.addr; in = walk->src.virt.addr; while (nbytes >= DES_BLOCK_SIZE) { - /* align to block size, max. PAGE_SIZE */ - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : - nbytes & ~(DES_BLOCK_SIZE - 1); - for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { - memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE, - DES_BLOCK_SIZE); - crypto_inc(ctrblk + i, DES_BLOCK_SIZE); - } - ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk); - if (ret < 0 || ret != n) + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = DES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, ctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); return -EIO; + } if (n > DES_BLOCK_SIZE) - memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE, + memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, DES_BLOCK_SIZE); - crypto_inc(ctrblk, DES_BLOCK_SIZE); + crypto_inc(ctrptr, DES_BLOCK_SIZE); out += n; in += n; nbytes -= n; } ret = blkcipher_walk_done(desc, walk, nbytes); } - + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; ret = crypt_s390_kmctr(func, ctx->key, buf, in, - DES_BLOCK_SIZE, ctrblk); + DES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != DES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); - crypto_inc(ctrblk, DES_BLOCK_SIZE); + crypto_inc(ctrbuf, DES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE); } - memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE); return ret; } From 39424e89d64661faa0a2e00c5ad1e6dbeebfa972 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 28 Jan 2014 08:22:11 -0500 Subject: [PATCH 040/171] x86, cpu hotplug: Fix stack frame warning in check_irq_vectors_for_cpu_disable() Further discussion here: http://marc.info/?l=linux-kernel&m=139073901101034&w=2 kbuild, 0day kernel build service, outputs the warning: arch/x86/kernel/irq.c:333:1: warning: the frame size of 2056 bytes is larger than 2048 bytes [-Wframe-larger-than=] because check_irq_vectors_for_cpu_disable() allocates two cpumasks on the stack. Fix this by moving the two cpumasks to a global file context. Reported-by: Fengguang Wu Tested-by: David Rientjes Signed-off-by: Prarit Bhargava Link: http://lkml.kernel.org/r/1390915331-27375-1-git-send-email-prarit@redhat.com Cc: Andi Kleen Cc: Michel Lespinasse Cc: Seiji Aguchi Cc: Yang Zhang Cc: Paul Gortmaker Cc: Janet Morgan Cc: Tony Luck Cc: Ruiv Wang Cc: Gong Chen Cc: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/irq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index dbb60878b744..d99f31d9a750 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -266,6 +266,14 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU + +/* These two declarations are only used in check_irq_vectors_for_cpu_disable() + * below, which is protected by stop_machine(). Putting them on the stack + * results in a stack frame overflow. Dynamically allocating could result in a + * failure so declare these two cpumasks as global. + */ +static struct cpumask affinity_new, online_new; + /* * This cpu is going to be removed and its vectors migrated to the remaining * online cpus. Check to see if there are enough vectors in the remaining cpus. @@ -277,7 +285,6 @@ int check_irq_vectors_for_cpu_disable(void) unsigned int this_cpu, vector, this_count, count; struct irq_desc *desc; struct irq_data *data; - struct cpumask affinity_new, online_new; this_cpu = smp_processor_id(); cpumask_copy(&online_new, cpu_online_mask); From 54820f5802295993b78a373e404e7561039b399d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2014 14:51:19 +0100 Subject: [PATCH 041/171] regulator: s2mps11: Fix NULL pointer of_node value when using platform data When platform_data is used for regulator (of_node of sec-core MFD device is NULL) the config.of_node for regulator is not initialized. This NULL value of config.of_node is later stored during regulator_register(). Thus any call by regulator consumers to of_get_regulator() will fail on of_parse_phandle() returning NULL. In this case (using platform_data and parent's driver of_node is NULL) set the config.of_node to reg_node from platform_data. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- drivers/regulator/s2mps11.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index d9e557990577..cd0b9e35a56d 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -441,6 +441,7 @@ common_reg: for (i = 0; i < S2MPS11_REGULATOR_MAX; i++) { if (!reg_np) { config.init_data = pdata->regulators[i].initdata; + config.of_node = pdata->regulators[i].reg_node; } else { config.init_data = rdata[i].init_data; config.of_node = rdata[i].of_node; From 6a02652df511029127406cf8fa89cdf5e987f963 Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Mon, 27 Jan 2014 14:39:13 +0100 Subject: [PATCH 042/171] perf tools: Fix include for non x86 architectures Commit 71ae8aac ("lib: introduce arch optimized hash library") added an include to for setting up an architecture specific fast hash. Since perf includes directly the non-uapi kernel header, it cannot find on non-x86 and thus prevents perf to be compiled on every architecture other than x86. The problem is the inclusion of in hash.h that results in the following error originating from util/evlist.c: fatal error: asm/hash.h: No such file or directory This commit simply adds an empty stub/file to fix the compile issue on non-x86 architectures. As perf does not use any of these new functions, it fixes the compilation and therefore seems to be the most appropriate solution to go with. Signed-off-by: Francesco Fusco Link: http://lkml.kernel.org/r/2cf8143aad65a6aa6fe30325ef8a65847141afa2.1390829373.git.ffusco@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/asm/hash.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tools/perf/util/include/asm/hash.h diff --git a/tools/perf/util/include/asm/hash.h b/tools/perf/util/include/asm/hash.h new file mode 100644 index 000000000000..d82b170bb216 --- /dev/null +++ b/tools/perf/util/include/asm/hash.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_HASH_H +#define __ASM_GENERIC_HASH_H + +/* Stub */ + +#endif /* __ASM_GENERIC_HASH_H */ From 9176753d1ed56951a6ee2a0f0a3f367904e35567 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:36 +0200 Subject: [PATCH 043/171] perf symbols: Fix symbol annotation for relocated kernel Kernel maps map memory addresses to file offsets. For symbol annotation, objdump needs the object VMA addresses. For an unrelocated kernel, that is the same as the memory address. The addresses passed to objdump for symbol annotation did not take into account kernel relocation. This patch fixes that. Reported-by: Linus Torvalds Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 5 +++-- tools/perf/util/map.h | 1 + tools/perf/util/symbol-elf.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 3b97513f0e77..39cd2d0faff6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -39,6 +39,7 @@ void map__init(struct map *map, enum map_type type, map->start = start; map->end = end; map->pgoff = pgoff; + map->reloc = 0; map->dso = dso; map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; @@ -288,7 +289,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip) if (map->dso->rel) return rip - map->pgoff; - return map->unmap_ip(map, rip); + return map->unmap_ip(map, rip) - map->reloc; } /** @@ -311,7 +312,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) if (map->dso->rel) return map->unmap_ip(map, ip + map->pgoff); - return ip; + return ip + map->reloc; } void map_groups__init(struct map_groups *mg) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 18068c6b71c1..257e513205ce 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -36,6 +36,7 @@ struct map { bool erange_warned; u32 priv; u64 pgoff; + u64 reloc; u32 maj, min; /* only valid for MMAP2 record */ u64 ino; /* only valid for MMAP2 record */ u64 ino_generation;/* only valid for MMAP2 record */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8f12f0f5101d..3e9f336740fa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -751,6 +751,8 @@ int dso__load_sym(struct dso *dso, struct map *map, if (strcmp(elf_name, kmap->ref_reloc_sym->name)) continue; kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + map->reloc = kmap->ref_reloc_sym->addr - + kmap->ref_reloc_sym->unrelocated_addr; break; } } From 29b596b57426831fce92cd0ebb01c77627616fdf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:37 +0200 Subject: [PATCH 044/171] perf tools: Add kallsyms__get_function_start() Separate out the logic used to find the start address of the reference symbol used to track kernel relocation. kallsyms__get_function_start() is used in subsequent patches. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 18 +++++++++++++++--- tools/perf/util/event.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1fc1c2f04772..17476df5c7b2 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -470,6 +470,17 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } +u64 kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name) +{ + struct process_symbol_args args = { .name = symbol_name, }; + + if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0) + return 0; + + return args.start; +} + int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, @@ -480,13 +491,13 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, char path[PATH_MAX]; char name_buff[PATH_MAX]; struct map *map; + u64 start; int err; /* * We should get this from /sys/kernel/sections/.text, but till that is * available use this, and after it is use this as a fallback for older * kernels. */ - struct process_symbol_args args = { .name = symbol_name, }; union perf_event *event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); if (event == NULL) { @@ -513,7 +524,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, } } - if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) { + start = kallsyms__get_function_start(filename, symbol_name); + if (!start) { free(event); return -ENOENT; } @@ -525,7 +537,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = args.start; + event->mmap.pgoff = start; event->mmap.start = map->start; event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index faf6e219be21..66a0c0392863 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -279,4 +279,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); +u64 kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name); + #endif /* __PERF_RECORD_H */ From 15a0a8706c32bd38bff9ebf7c6ef24f32d1ea921 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:38 +0200 Subject: [PATCH 045/171] perf machine: Add machine__get_kallsyms_filename() Separate out the logic used to make the kallsyms full path name for a machine. It will be reused in a subsequent patch. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ded74590b92f..290c2e6d4001 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -496,19 +496,22 @@ static int symbol__in_kernel(void *arg, const char *name, return 1; } +static void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz) +{ + if (machine__is_default_guest(machine)) + scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); + else + scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir); +} + /* Figure out the start address of kernel map from /proc/kallsyms */ static u64 machine__get_kernel_start_addr(struct machine *machine) { - const char *filename; - char path[PATH_MAX]; + char filename[PATH_MAX]; struct process_args args; - if (machine__is_default_guest(machine)) - filename = (char *)symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } + machine__get_kallsyms_filename(machine, filename, PATH_MAX); if (symbol__restricted_filename(filename, "/proc/kallsyms")) return 0; From 5512cf24bed2de56f1ef44b6cc9a0a9b15499cea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:39 +0200 Subject: [PATCH 046/171] perf machine: Set up ref_reloc_sym in machine__create_kernel_maps() The ref_reloc_sym is always needed for the kernel map in order to check for relocation. Consequently set it up when the kernel map is created. Otherwise it was only being set up by 'perf record'. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 23 +++++++++++++++++++++++ tools/perf/util/machine.h | 2 ++ 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 290c2e6d4001..c872991e0f65 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -832,9 +832,25 @@ static int machine__create_modules(struct machine *machine) return 0; } +const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); + char filename[PATH_MAX]; + const char *name; + u64 addr = 0; + int i; + + machine__get_kallsyms_filename(machine, filename, PATH_MAX); + + for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { + addr = kallsyms__get_function_start(filename, name); + if (addr) + break; + } + if (!addr) + return -1; if (kernel == NULL || __machine__create_kernel_maps(machine, kernel) < 0) @@ -853,6 +869,13 @@ int machine__create_kernel_maps(struct machine *machine) * Now that we have all the maps created, just set the ->end of them: */ map_groups__fixup_end(&machine->kmaps); + + if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, + addr)) { + machine__destroy_kernel_maps(machine); + return -1; + } + return 0; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 477133015440..f77e91e483dc 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -18,6 +18,8 @@ union perf_event; #define HOST_KERNEL_ID (-1) #define DEFAULT_GUEST_KERNEL_ID (0) +extern const char *ref_reloc_sym_names[]; + struct machine { struct rb_node rb_node; pid_t pid; From 0ae617bedde062003fd70e566e9a2601e273ea0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:40 +0200 Subject: [PATCH 047/171] perf record: Get ref_reloc_sym from kernel map Now that ref_reloc_sym is set up when the kernel map is created, 'perf record' does not need to pass the symbol names to perf_event__synthesize_kernel_mmap() which can read the values needed from ref_reloc_sym directly. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 10 ++-------- tools/perf/util/event.c | 26 ++++++-------------------- tools/perf/util/event.h | 3 +-- 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3c394bf16fa8..af47531b82ec 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -287,10 +287,7 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data) * have no _text sometimes. */ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_text"); - if (err < 0) - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_stext"); + machine); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -457,10 +454,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_text"); - if (err < 0) - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_stext"); + machine); if (err < 0) pr_err("Couldn't record kernel reference relocation symbol\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 17476df5c7b2..b0f3ca850e9e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -483,15 +483,13 @@ u64 kallsyms__get_function_start(const char *kallsyms_filename, int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, - const char *symbol_name) + struct machine *machine) { size_t size; - const char *filename, *mmap_name; - char path[PATH_MAX]; + const char *mmap_name; char name_buff[PATH_MAX]; struct map *map; - u64 start; + struct kmap *kmap; int err; /* * We should get this from /sys/kernel/sections/.text, but till that is @@ -513,31 +511,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, * see kernel/perf_event.c __perf_event_mmap */ event->header.misc = PERF_RECORD_MISC_KERNEL; - filename = "/proc/kallsyms"; } else { event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (machine__is_default_guest(machine)) - filename = (char *) symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } - } - - start = kallsyms__get_function_start(filename, symbol_name); - if (!start) { - free(event); - return -ENOENT; } map = machine->vmlinux_maps[MAP__FUNCTION]; + kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", mmap_name, symbol_name) + 1; + "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = start; + event->mmap.pgoff = kmap->ref_reloc_sym->addr; event->mmap.start = map->start; event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 66a0c0392863..851fa06f4a42 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -214,8 +214,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, struct machine *machine, bool mmap_data); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, - const char *symbol_name); + struct machine *machine); int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, From a00d28cb72d3629c6481fe21ba6c6b4f96caed49 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:41 +0200 Subject: [PATCH 048/171] perf symbols: Prevent the use of kcore if the kernel has moved Use of kcore is predicated upon it matching the recorded data. If the kernel has been relocated at boot time (i.e. since the data was recorded) then do not use kcore. Note that it is possible to make a copy of kcore at the time the data is recorded using 'perf buildid-cache'. Then the perf tools will use the copy because it does match the data. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 39ce9adbaaf0..4ac1f871ec27 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -976,6 +976,23 @@ static int validate_kcore_modules(const char *kallsyms_filename, return 0; } +static int validate_kcore_addresses(const char *kallsyms_filename, + struct map *map) +{ + struct kmap *kmap = map__kmap(map); + + if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { + u64 start; + + start = kallsyms__get_function_start(kallsyms_filename, + kmap->ref_reloc_sym->name); + if (start != kmap->ref_reloc_sym->addr) + return -EINVAL; + } + + return validate_kcore_modules(kallsyms_filename, map); +} + struct kcore_mapfn_data { struct dso *dso; enum map_type type; @@ -1019,8 +1036,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, kallsyms_filename)) return -EINVAL; - /* All modules must be present at their original addresses */ - if (validate_kcore_modules(kallsyms_filename, map)) + /* Modules and kernel must be present at their original addresses */ + if (validate_kcore_addresses(kallsyms_filename, map)) return -EINVAL; md.dso = dso; @@ -1424,7 +1441,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) continue; scnprintf(kallsyms_filename, sizeof(kallsyms_filename), "%s/%s/kallsyms", dir, dent->d_name); - if (!validate_kcore_modules(kallsyms_filename, map)) { + if (!validate_kcore_addresses(kallsyms_filename, map)) { strlcpy(dir, kallsyms_filename, dir_sz); ret = 0; break; @@ -1479,7 +1496,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) if (fd != -1) { close(fd); /* If module maps match go with /proc/kallsyms */ - if (!validate_kcore_modules("/proc/kallsyms", map)) + if (!validate_kcore_addresses("/proc/kallsyms", map)) goto proc_kallsyms; } From c080f72753def150993144d755379941f8b14683 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:42 +0200 Subject: [PATCH 049/171] perf tests: No need to set up ref_reloc_sym Now that ref_reloc_sym is set up by machine__create_kernel_maps(), the "vmlinux symtab matches kallsyms" test does have to do it. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/vmlinux-kallsyms.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 2bd13edcbc17..3d9088003a5b 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -26,7 +26,6 @@ int test__vmlinux_matches_kallsyms(void) struct map *kallsyms_map, *vmlinux_map; struct machine kallsyms, vmlinux; enum map_type type = MAP__FUNCTION; - struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; u64 mem_start, mem_end; /* @@ -70,14 +69,6 @@ int test__vmlinux_matches_kallsyms(void) */ kallsyms_map = machine__kernel_map(&kallsyms, type); - sym = map__find_symbol_by_name(kallsyms_map, ref_reloc_sym.name, NULL); - if (sym == NULL) { - pr_debug("dso__find_symbol_by_name "); - goto out; - } - - ref_reloc_sym.addr = UM(sym->start); - /* * Step 5: * @@ -89,7 +80,6 @@ int test__vmlinux_matches_kallsyms(void) } vmlinux_map = machine__kernel_map(&vmlinux, type); - map__kmap(vmlinux_map)->ref_reloc_sym = &ref_reloc_sym; /* * Step 6: From d9b62aba87a82939c73f451a166c7a21342350d6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:43 +0200 Subject: [PATCH 050/171] perf tools: Adjust kallsyms for relocated kernel If the kernel is relocated at boot time, kallsyms will not match data recorded previously. That does not matter for modules because they are corrected anyway. It also does not matter if vmlinux is being used for symbols. But if perf tools has only kallsyms then the symbols will not match. Fix by applying the delta gained by comparing the old and current addresses of the relocation reference symbol. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4ac1f871ec27..a9d758a3b371 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -627,7 +627,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int dso__split_kallsyms(struct dso *dso, struct map *map, +static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, symbol_filter_t filter) { struct map_groups *kmaps = map__kmap(map)->kmaps; @@ -692,6 +692,12 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, char dso_name[PATH_MAX]; struct dso *ndso; + if (delta) { + /* Kernel was relocated at boot time */ + pos->start -= delta; + pos->end -= delta; + } + if (count == 0) { curr_map = map; goto filter_symbol; @@ -721,6 +727,10 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; map_groups__insert(kmaps, curr_map); ++kernel_range; + } else if (delta) { + /* Kernel was relocated at boot time */ + pos->start -= delta; + pos->end -= delta; } filter_symbol: if (filter && filter(curr_map, pos)) { @@ -1130,15 +1140,41 @@ out_err: return -EINVAL; } +/* + * If the kernel is relocated at boot time, kallsyms won't match. Compute the + * delta based on the relocation reference symbol. + */ +static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) +{ + struct kmap *kmap = map__kmap(map); + u64 addr; + + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) + return 0; + + addr = kallsyms__get_function_start(filename, + kmap->ref_reloc_sym->name); + if (!addr) + return -1; + + *delta = addr - kmap->ref_reloc_sym->addr; + return 0; +} + int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter) { + u64 delta = 0; + if (symbol__restricted_filename(filename, "/proc/kallsyms")) return -1; if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; + if (kallsyms__delta(map, filename, &delta)) + return -1; + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); @@ -1150,7 +1186,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, if (!dso__load_kcore(dso, map, filename)) return dso__split_kallsyms_for_kcore(dso, map, filter); else - return dso__split_kallsyms(dso, map, filter); + return dso__split_kallsyms(dso, map, delta, filter); } static int dso__load_perf_map(struct dso *dso, struct map *map, From d3b70220292c40d3b499797fd2f33f608fc35edb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:44 +0200 Subject: [PATCH 051/171] perf buildid-cache: Check relocation when checking for existing kcore perf buildid-cache does not make another copy of kcore if the buildid and modules match an existing copy. That does not take into account the possibility that the kernel has been relocated. Extend the check to check if the reference relocation symbol matches too, otherwise do make a copy. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 33 ++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index cfede86161d8..b22dbb16f877 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -63,11 +63,35 @@ static int build_id_cache__kcore_dir(char *dir, size_t sz) return 0; } +static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) +{ + char from[PATH_MAX]; + char to[PATH_MAX]; + const char *name; + u64 addr1 = 0, addr2 = 0; + int i; + + scnprintf(from, sizeof(from), "%s/kallsyms", from_dir); + scnprintf(to, sizeof(to), "%s/kallsyms", to_dir); + + for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { + addr1 = kallsyms__get_function_start(from, name); + if (addr1) + break; + } + + if (name) + addr2 = kallsyms__get_function_start(to, name); + + return addr1 == addr2; +} + static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, size_t to_dir_sz) { char from[PATH_MAX]; char to[PATH_MAX]; + char to_subdir[PATH_MAX]; struct dirent *dent; int ret = -1; DIR *d; @@ -86,10 +110,11 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, continue; scnprintf(to, sizeof(to), "%s/%s/modules", to_dir, dent->d_name); - if (!compare_proc_modules(from, to)) { - scnprintf(to, sizeof(to), "%s/%s", to_dir, - dent->d_name); - strlcpy(to_dir, to, to_dir_sz); + scnprintf(to_subdir, sizeof(to_subdir), "%s/%s", + to_dir, dent->d_name); + if (!compare_proc_modules(from, to) && + same_kallsyms_reloc(from_dir, to_subdir)) { + strlcpy(to_dir, to_subdir, to_dir_sz); ret = 0; break; } From 718360c59f34b80d9878429300c1c688f7c2031d Mon Sep 17 00:00:00 2001 From: Noah Massey Date: Thu, 30 Jan 2014 21:31:12 -0500 Subject: [PATCH 052/171] nfs: fix setting of ACLs on file creation. nfs3_get_acl() tries to skip posix equivalent ACLs, but misinterprets the return value of posix_acl_equiv_mode(). Fix it. This is a regression introduced by "nfs: use generic posix ACL infrastructure for v3 Posix ACLs" CC: Christoph Hellwig CC: linux-nfs@vger.kernel.org CC: linux-fsdevel@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9a5ca03fa539..0851f852568d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -80,7 +80,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) } if (res.acl_access != NULL) { - if (posix_acl_equiv_mode(res.acl_access, NULL) || + if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) || res.acl_access->a_count == 0) { posix_acl_release(res.acl_access); res.acl_access = NULL; From 17ead6c85c3d0ef57a14d1373f1f1cee2ce60ea8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2014 14:53:23 -0500 Subject: [PATCH 053/171] NFSv4: Fix memory corruption in nfs4_proc_open_confirm nfs41_wake_and_assign_slot() relies on the task->tk_msg.rpc_argp and task->tk_msg.rpc_resp always pointing to the session sequence arguments. nfs4_proc_open_confirm tries to pull a fast one by reusing the open sequence structure, thus causing corruption of the NFSv4 slot table. Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- include/linux/nfs_xdr.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 42da6af77587..2da6a698b8f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1620,15 +1620,15 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; - nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task); + nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, + &data->c_res.seq_res, task); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; - nfs40_sequence_done(task, &data->o_res.seq_res); + nfs40_sequence_done(task, &data->c_res.seq_res); data->rpc_status = task->tk_status; if (data->rpc_status == 0) { @@ -1686,7 +1686,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) }; int status; - nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1); + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ccfcecf8999..b2fb167b2e6d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -379,12 +379,14 @@ struct nfs_openres { * Arguments to the open_confirm call. */ struct nfs_open_confirmargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; nfs4_stateid * stateid; struct nfs_seqid * seqid; }; struct nfs_open_confirmres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * seqid; }; From 20b9a9024540a775395d5d1f41eec0ec6ec41f9b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2014 13:47:06 -0500 Subject: [PATCH 054/171] NFSv4.1: nfs4_destroy_session must call rpc_destroy_waitqueue There may still be timers active on the session waitqueues. Make sure that we kill them before freeing the memory. Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- fs/nfs/nfs4session.c | 25 ++++++++++++++++++++----- fs/nfs/nfs4session.h | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index dbb3e1f30c68..860ad26a5590 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -170,7 +170,7 @@ void nfs41_shutdown_client(struct nfs_client *clp) void nfs40_shutdown_client(struct nfs_client *clp) { if (clp->cl_slot_tbl) { - nfs4_release_slot_table(clp->cl_slot_tbl); + nfs4_shutdown_slot_table(clp->cl_slot_tbl); kfree(clp->cl_slot_tbl); } } diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index cf883c7ae053..e799dc3c3b1d 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -231,14 +231,23 @@ out: return ret; } +/* + * nfs4_release_slot_table - release all slot table entries + */ +static void nfs4_release_slot_table(struct nfs4_slot_table *tbl) +{ + nfs4_shrink_slot_table(tbl, 0); +} + /** - * nfs4_release_slot_table - release resources attached to a slot table + * nfs4_shutdown_slot_table - release resources attached to a slot table * @tbl: slot table to shut down * */ -void nfs4_release_slot_table(struct nfs4_slot_table *tbl) +void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl) { - nfs4_shrink_slot_table(tbl, 0); + nfs4_release_slot_table(tbl); + rpc_destroy_wait_queue(&tbl->slot_tbl_waitq); } /** @@ -422,7 +431,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_unlock(&tbl->slot_tbl_lock); } -static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) +static void nfs4_release_session_slot_tables(struct nfs4_session *session) { nfs4_release_slot_table(&session->fc_slot_table); nfs4_release_slot_table(&session->bc_slot_table); @@ -450,7 +459,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get * both slot tables or neither */ - nfs4_destroy_session_slot_tables(ses); + nfs4_release_session_slot_tables(ses); return status; } @@ -470,6 +479,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) return session; } +static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) +{ + nfs4_shutdown_slot_table(&session->fc_slot_table); + nfs4_shutdown_slot_table(&session->bc_slot_table); +} + void nfs4_destroy_session(struct nfs4_session *session) { struct rpc_xprt *xprt; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 232306100651..b34ada9bc6a2 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -74,7 +74,7 @@ enum nfs4_session_state { extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs, const char *queue); -extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl); +extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); From 85fc73a2cdf10cf42bc36fb3bca3896b2095a1c2 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Sat, 1 Feb 2014 13:30:19 +0100 Subject: [PATCH 055/171] x86: Fix the initialization of physnode_map With DISCONTIGMEM, the mapping between a pfn and its owning node is initialized using data provided by the BIOS. However, the initialization may fail if the extents are not aligned to section boundary (64M). The symptom of this bug is an early boot failure in pfn_to_page(), as it tries to access NODE_DATA(__nid) using index from an unitialized element of the physnode_map[] array. While the bug is always present, it is more likely to be hit in kdump kernels on large machines, because: 1. The memory map for a kdump kernel is specified as exactmap, and exactmap is more likely to be unaligned. 2. Large reservations are more likely to span across a 64M boundary. [ hpa: fixed incorrect use of "pfn" instead of "start" ] Signed-off-by: Petr Tesarik Link: http://lkml.kernel.org/r/20140201133019.32e56f86@hananiah.suse.cz Acked-by: David Rientjes Signed-off-by: H. Peter Anvin --- arch/x86/mm/numa_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 0342d27ca798..47b6436e41c2 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end) nid, start, end); printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); printk(KERN_DEBUG " "); + start = round_down(start, PAGES_PER_SECTION); + end = round_up(end, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { physnode_map[pfn / PAGES_PER_SECTION] = nid; printk(KERN_CONT "%lx ", pfn); From 9ac27090f61ea6735a62b0a98c7669c833bcdc09 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 31 Jan 2014 16:53:39 -0700 Subject: [PATCH 056/171] NVMe: Namespace use after free on surprise removal An nvme block device may have open references when the device is removed. New commands may still be sent on the removed device, so we need to ref count the opens, return errors for new commands, and not free the namespace and nvme_dev until all references are closed. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 55 ++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 23728099e36f..cd39390710a0 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1716,10 +1716,31 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, #define nvme_compat_ioctl NULL #endif +static int nvme_open(struct block_device *bdev, fmode_t mode) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + struct nvme_dev *dev = ns->dev; + + kref_get(&dev->kref); + return 0; +} + +static void nvme_free_dev(struct kref *kref); + +static void nvme_release(struct gendisk *disk, fmode_t mode) +{ + struct nvme_ns *ns = disk->private_data; + struct nvme_dev *dev = ns->dev; + + kref_put(&dev->kref, nvme_free_dev); +} + static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, .compat_ioctl = nvme_compat_ioctl, + .open = nvme_open, + .release = nvme_release, }; static void nvme_resubmit_bios(struct nvme_queue *nvmeq) @@ -1849,13 +1870,6 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, return NULL; } -static void nvme_ns_free(struct nvme_ns *ns) -{ - put_disk(ns->disk); - blk_cleanup_queue(ns->queue); - kfree(ns); -} - static int set_queue_count(struct nvme_dev *dev, int count) { int status; @@ -2287,12 +2301,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) static void nvme_dev_remove(struct nvme_dev *dev) { - struct nvme_ns *ns, *next; + struct nvme_ns *ns; - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - list_del(&ns->list); - del_gendisk(ns->disk); - nvme_ns_free(ns); + list_for_each_entry(ns, &dev->namespaces, list) { + if (ns->disk->flags & GENHD_FL_UP) + del_gendisk(ns->disk); + if (!blk_queue_dying(ns->queue)) + blk_cleanup_queue(ns->queue); } } @@ -2349,9 +2364,22 @@ static void nvme_release_instance(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } +static void nvme_free_namespaces(struct nvme_dev *dev) +{ + struct nvme_ns *ns, *next; + + list_for_each_entry_safe(ns, next, &dev->namespaces, list) { + list_del(&ns->list); + put_disk(ns->disk); + kfree(ns); + } +} + static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); + + nvme_free_namespaces(dev); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2525,6 +2553,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } + kref_init(&dev->kref); result = nvme_dev_add(dev); if (result) goto shutdown; @@ -2540,11 +2569,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto remove; dev->initialized = 1; - kref_init(&dev->kref); return 0; remove: nvme_dev_remove(dev); + nvme_free_namespaces(dev); shutdown: nvme_dev_shutdown(dev); release_pools: From daa436e67cd2dcac7cbb505bb4425fdfdafaa5a7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 30 Jan 2014 19:51:14 -0800 Subject: [PATCH 057/171] hwmon: (pmbus) Support per-page exponent in linear mode Some chips use different exponents for sensors on different pages or rails. Detect and store exponent per page to support this situation. This fixes a problem with wrong voltages seen on UCD90120. Reported-by: Soren Brinkmann Tested-by: Soren Brinkmann Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 68 +++++++++++++++++--------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 3cbf66e9d861..291d11fe93e7 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -90,7 +90,8 @@ struct pmbus_data { u32 flags; /* from platform data */ - int exponent; /* linear mode: exponent for output voltages */ + int exponent[PMBUS_PAGES]; + /* linear mode: exponent for output voltages */ const struct pmbus_driver_info *info; @@ -410,7 +411,7 @@ static long pmbus_reg2data_linear(struct pmbus_data *data, long val; if (sensor->class == PSC_VOLTAGE_OUT) { /* LINEAR16 */ - exponent = data->exponent; + exponent = data->exponent[sensor->page]; mantissa = (u16) sensor->data; } else { /* LINEAR11 */ exponent = ((s16)sensor->data) >> 11; @@ -516,7 +517,7 @@ static long pmbus_reg2data(struct pmbus_data *data, struct pmbus_sensor *sensor) #define MIN_MANTISSA (511 * 1000) static u16 pmbus_data2reg_linear(struct pmbus_data *data, - enum pmbus_sensor_classes class, long val) + struct pmbus_sensor *sensor, long val) { s16 exponent = 0, mantissa; bool negative = false; @@ -525,7 +526,7 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, if (val == 0) return 0; - if (class == PSC_VOLTAGE_OUT) { + if (sensor->class == PSC_VOLTAGE_OUT) { /* LINEAR16 does not support negative voltages */ if (val < 0) return 0; @@ -534,10 +535,10 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, * For a static exponents, we don't have a choice * but to adjust the value to it. */ - if (data->exponent < 0) - val <<= -data->exponent; + if (data->exponent[sensor->page] < 0) + val <<= -data->exponent[sensor->page]; else - val >>= data->exponent; + val >>= data->exponent[sensor->page]; val = DIV_ROUND_CLOSEST(val, 1000); return val & 0xffff; } @@ -548,14 +549,14 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, } /* Power is in uW. Convert to mW before converting. */ - if (class == PSC_POWER) + if (sensor->class == PSC_POWER) val = DIV_ROUND_CLOSEST(val, 1000L); /* * For simplicity, convert fan data to milli-units * before calculating the exponent. */ - if (class == PSC_FAN) + if (sensor->class == PSC_FAN) val = val * 1000; /* Reduce large mantissa until it fits into 10 bit */ @@ -585,22 +586,22 @@ static u16 pmbus_data2reg_linear(struct pmbus_data *data, } static u16 pmbus_data2reg_direct(struct pmbus_data *data, - enum pmbus_sensor_classes class, long val) + struct pmbus_sensor *sensor, long val) { long m, b, R; - m = data->info->m[class]; - b = data->info->b[class]; - R = data->info->R[class]; + m = data->info->m[sensor->class]; + b = data->info->b[sensor->class]; + R = data->info->R[sensor->class]; /* Power is in uW. Adjust R and b. */ - if (class == PSC_POWER) { + if (sensor->class == PSC_POWER) { R -= 3; b *= 1000; } /* Calculate Y = (m * X + b) * 10^R */ - if (class != PSC_FAN) { + if (sensor->class != PSC_FAN) { R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } @@ -619,7 +620,7 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, } static u16 pmbus_data2reg_vid(struct pmbus_data *data, - enum pmbus_sensor_classes class, long val) + struct pmbus_sensor *sensor, long val) { val = clamp_val(val, 500, 1600); @@ -627,20 +628,20 @@ static u16 pmbus_data2reg_vid(struct pmbus_data *data, } static u16 pmbus_data2reg(struct pmbus_data *data, - enum pmbus_sensor_classes class, long val) + struct pmbus_sensor *sensor, long val) { u16 regval; - switch (data->info->format[class]) { + switch (data->info->format[sensor->class]) { case direct: - regval = pmbus_data2reg_direct(data, class, val); + regval = pmbus_data2reg_direct(data, sensor, val); break; case vid: - regval = pmbus_data2reg_vid(data, class, val); + regval = pmbus_data2reg_vid(data, sensor, val); break; case linear: default: - regval = pmbus_data2reg_linear(data, class, val); + regval = pmbus_data2reg_linear(data, sensor, val); break; } return regval; @@ -746,7 +747,7 @@ static ssize_t pmbus_set_sensor(struct device *dev, return -EINVAL; mutex_lock(&data->update_lock); - regval = pmbus_data2reg(data, sensor->class, val); + regval = pmbus_data2reg(data, sensor, val); ret = _pmbus_write_word_data(client, sensor->page, sensor->reg, regval); if (ret < 0) rv = ret; @@ -1643,12 +1644,13 @@ static int pmbus_find_attributes(struct i2c_client *client, * This function is called for all chips. */ static int pmbus_identify_common(struct i2c_client *client, - struct pmbus_data *data) + struct pmbus_data *data, int page) { int vout_mode = -1; - if (pmbus_check_byte_register(client, 0, PMBUS_VOUT_MODE)) - vout_mode = _pmbus_read_byte_data(client, 0, PMBUS_VOUT_MODE); + if (pmbus_check_byte_register(client, page, PMBUS_VOUT_MODE)) + vout_mode = _pmbus_read_byte_data(client, page, + PMBUS_VOUT_MODE); if (vout_mode >= 0 && vout_mode != 0xff) { /* * Not all chips support the VOUT_MODE command, @@ -1659,7 +1661,7 @@ static int pmbus_identify_common(struct i2c_client *client, if (data->info->format[PSC_VOLTAGE_OUT] != linear) return -ENODEV; - data->exponent = ((s8)(vout_mode << 3)) >> 3; + data->exponent[page] = ((s8)(vout_mode << 3)) >> 3; break; case 1: /* VID mode */ if (data->info->format[PSC_VOLTAGE_OUT] != vid) @@ -1674,7 +1676,7 @@ static int pmbus_identify_common(struct i2c_client *client, } } - pmbus_clear_fault_page(client, 0); + pmbus_clear_fault_page(client, page); return 0; } @@ -1682,7 +1684,7 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, struct pmbus_driver_info *info) { struct device *dev = &client->dev; - int ret; + int page, ret; /* * Some PMBus chips don't support PMBUS_STATUS_BYTE, so try @@ -1715,10 +1717,12 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, return -ENODEV; } - ret = pmbus_identify_common(client, data); - if (ret < 0) { - dev_err(dev, "Failed to identify chip capabilities\n"); - return ret; + for (page = 0; page < info->pages; page++) { + ret = pmbus_identify_common(client, data, page); + if (ret < 0) { + dev_err(dev, "Failed to identify chip capabilities\n"); + return ret; + } } return 0; } From b0dcfd87323ea86501e93d0fa2a98d2fd3579bcf Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Tue, 21 Jan 2014 16:55:18 +0100 Subject: [PATCH 058/171] pinctrl: at91: use locked variant of irq_set_handler When setting the gpio irq type, use the __irq_set_handler_locked() variant instead of the irq_set_handler() to prevent false spinlock recursion warning. Signed-off-by: Nicolas Ferre Cc: stable # v3.12 Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-at91.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 38c6f8b9790e..d990e33d8aa7 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1286,22 +1286,22 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type) switch (type) { case IRQ_TYPE_EDGE_RISING: - irq_set_handler(d->irq, handle_simple_irq); + __irq_set_handler_locked(d->irq, handle_simple_irq); writel_relaxed(mask, pio + PIO_ESR); writel_relaxed(mask, pio + PIO_REHLSR); break; case IRQ_TYPE_EDGE_FALLING: - irq_set_handler(d->irq, handle_simple_irq); + __irq_set_handler_locked(d->irq, handle_simple_irq); writel_relaxed(mask, pio + PIO_ESR); writel_relaxed(mask, pio + PIO_FELLSR); break; case IRQ_TYPE_LEVEL_LOW: - irq_set_handler(d->irq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); writel_relaxed(mask, pio + PIO_LSR); writel_relaxed(mask, pio + PIO_FELLSR); break; case IRQ_TYPE_LEVEL_HIGH: - irq_set_handler(d->irq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); writel_relaxed(mask, pio + PIO_LSR); writel_relaxed(mask, pio + PIO_REHLSR); break; @@ -1310,7 +1310,7 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type) * disable additional interrupt modes: * fall back to default behavior */ - irq_set_handler(d->irq, handle_simple_irq); + __irq_set_handler_locked(d->irq, handle_simple_irq); writel_relaxed(mask, pio + PIO_AIMDR); return 0; case IRQ_TYPE_NONE: From 795779df22afc8bdee4e9fbe5c18c47e44974d75 Mon Sep 17 00:00:00 2001 From: Chris Ruehl Date: Wed, 22 Jan 2014 11:14:51 +0800 Subject: [PATCH 059/171] pinctrl: imx27: fix wrong offset to ICONFB The offset to ICONFB was incorrect, this patch set the correct value 0x14. dev_dbg in function imx1_write_2bit print the wrong address and had been moved after address calculation. Cc: stable@vger.kernel.org Signed-off-by: Chris Ruehl Reviewed-by: Markus Pargmann Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx1-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c index 17aecde1b51d..9e7893fe96e0 100644 --- a/drivers/pinctrl/pinctrl-imx1-core.c +++ b/drivers/pinctrl/pinctrl-imx1-core.c @@ -45,7 +45,7 @@ struct imx1_pinctrl { #define MX1_DDIR 0x00 #define MX1_OCR 0x04 #define MX1_ICONFA 0x0c -#define MX1_ICONFB 0x10 +#define MX1_ICONFB 0x14 #define MX1_GIUS 0x20 #define MX1_GPR 0x38 #define MX1_PUEN 0x40 @@ -97,13 +97,13 @@ static void imx1_write_2bit(struct imx1_pinctrl *ipctl, unsigned int pin_id, u32 old_val; u32 new_val; - dev_dbg(ipctl->dev, "write: register 0x%p offset %d value 0x%x\n", - reg, offset, value); - /* Use the next register if the pin's port pin number is >=16 */ if (pin_id % 32 >= 16) reg += 0x04; + dev_dbg(ipctl->dev, "write: register 0x%p offset %d value 0x%x\n", + reg, offset, value); + /* Get current state of pins */ old_val = readl(reg); old_val &= mask; From f17248ed868767567298e1cdf06faf8159a81f7c Mon Sep 17 00:00:00 2001 From: Tony Prisk Date: Thu, 23 Jan 2014 21:57:33 +1300 Subject: [PATCH 060/171] pinctrl: vt8500: Change devicetree data parsing Due to an assumption in the VT8500 pinctrl driver, the value passed from devicetree for 'wm,pull' was not explicitly translated before being passed to pinconf. Since v3.10, changes to 'enum pin_config_param', PIN_CONFIG_BIAS_PULL_(UP/DOWN) no longer map 1-to-1 with the expected values in devicetree. This patch adds a small translation between the devicetree values (0..2) and the enum pin_config_param equivalent values. Cc: # v3.10+ Signed-off-by: Tony Prisk Signed-off-by: Linus Walleij --- drivers/pinctrl/vt8500/pinctrl-wmt.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index b28d1af9c232..9802b67040cc 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -276,7 +276,20 @@ static int wmt_pctl_dt_node_to_map_pull(struct wmt_pinctrl_data *data, if (!configs) return -ENOMEM; - configs[0] = pull; + switch (pull) { + case 0: + configs[0] = PIN_CONFIG_BIAS_DISABLE; + break; + case 1: + configs[0] = PIN_CONFIG_BIAS_PULL_DOWN; + break; + case 2: + configs[0] = PIN_CONFIG_BIAS_PULL_UP; + break; + default: + configs[0] = PIN_CONFIG_BIAS_DISABLE; + dev_err(data->dev, "invalid pull state %d - disabling\n", pull); + } map->type = PIN_MAP_TYPE_CONFIGS_PIN; map->data.configs.group_or_pin = data->groups[group]; From e3365d0974ed64157f5b5a576c611057dc40a595 Mon Sep 17 00:00:00 2001 From: Chris Ruehl Date: Wed, 22 Jan 2014 11:14:52 +0800 Subject: [PATCH 061/171] pinctrl: imx27: fix offset calculation in imx_read_2bit The offset for the 2bit register calculate wrong, this patch fixes the problem. The debugfs printout for oconf, iconfa, iconfb now shows the real values. Cc: stable@vger.kernel.org Signed-off-by: Chris Ruehl Reviewed-by: Markus Pargmann Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx1-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c index 9e7893fe96e0..815384b377b5 100644 --- a/drivers/pinctrl/pinctrl-imx1-core.c +++ b/drivers/pinctrl/pinctrl-imx1-core.c @@ -139,7 +139,7 @@ static int imx1_read_2bit(struct imx1_pinctrl *ipctl, unsigned int pin_id, u32 reg_offset) { void __iomem *reg = imx1_mem(ipctl, pin_id) + reg_offset; - int offset = pin_id % 16; + int offset = (pin_id % 16) * 2; /* Use the next register if the pin's port pin number is >=16 */ if (pin_id % 32 >= 16) From fa74d0d3e30cf079e94db327ea6d4726cd7d5871 Mon Sep 17 00:00:00 2001 From: Qipan Li Date: Mon, 27 Jan 2014 14:01:29 +0800 Subject: [PATCH 062/171] pinctrl: sirf: correct the pin index of ac97_pins group according to datasheet and ac97_muxmask assignment, ac97_pins should be corrected. Signed-off-by: Qipan Li Signed-off-by: Barry Song Signed-off-by: Linus Walleij --- drivers/pinctrl/sirf/pinctrl-prima2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sirf/pinctrl-prima2.c b/drivers/pinctrl/sirf/pinctrl-prima2.c index 37b42651d76a..dde0285544d6 100644 --- a/drivers/pinctrl/sirf/pinctrl-prima2.c +++ b/drivers/pinctrl/sirf/pinctrl-prima2.c @@ -413,7 +413,7 @@ static const struct sirfsoc_padmux ac97_padmux = { .funcval = 0, }; -static const unsigned ac97_pins[] = { 33, 34, 35, 36 }; +static const unsigned ac97_pins[] = { 43, 44, 45, 46 }; static const struct sirfsoc_muxmask spi1_muxmask[] = { { From 4fa71c1550a857ff1dbfe9e99acc1f4cfec5f0d0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Feb 2014 09:37:59 +0100 Subject: [PATCH 063/171] ALSA: usb-audio: Add missing kconfig dependecy The commit 44dcbbb1cd61 introduced the usage of bitreverse helpers but forgot to add the dependency. This patch adds the selection for CONFIG_BITREVERSE. Fixes: 44dcbbb1cd61 ('ALSA: snd-usb: add support for bit-reversed byte formats') Reported-by: Fengguang Wu Cc: Signed-off-by: Takashi Iwai --- sound/usb/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig index de9408b83f75..e05a86b7c0da 100644 --- a/sound/usb/Kconfig +++ b/sound/usb/Kconfig @@ -14,6 +14,7 @@ config SND_USB_AUDIO select SND_HWDEP select SND_RAWMIDI select SND_PCM + select BITREVERSE help Say Y here to include support for USB audio and USB MIDI devices. From e85fc9805591a17ca8af50023ee8e2b61d9a123b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 3 Feb 2014 06:43:59 -0500 Subject: [PATCH 064/171] Revert "xen/grant-table: Avoid m2p_override during mapping" This reverts commit 08ece5bb2312b4510b161a6ef6682f37f4eac8a1. As it breaks ARM builds and needs more attention on the ARM side. Acked-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 5 +- arch/x86/xen/p2m.c | 17 +++++- drivers/block/xen-blkback/blkback.c | 15 +++-- drivers/xen/gntdev.c | 13 ++--- drivers/xen/grant-table.c | 89 +++++------------------------ include/xen/grant_table.h | 8 +-- 6 files changed, 46 insertions(+), 101 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 787e1bb5aafc..3e276eb23d1b 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -52,8 +52,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); extern int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn); + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); @@ -122,7 +121,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) pfn = m2p_find_override_pfn(mfn, ~0); } - /* + /* * pfn is ~0 if there are no entries in the m2p for mfn or if the * entry doesn't map back to the mfn and m2p_override doesn't have a * valid entry for it. diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8009acbe41e4..696c694986d0 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -899,6 +899,13 @@ int m2p_add_override(unsigned long mfn, struct page *page, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } + WARN_ON(PagePrivate(page)); + SetPagePrivate(page); + set_page_private(page, mfn); + page->index = pfn_to_mfn(pfn); + + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { @@ -937,16 +944,19 @@ int m2p_add_override(unsigned long mfn, struct page *page, } EXPORT_SYMBOL_GPL(m2p_add_override); int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn) + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; + unsigned long mfn; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; pfn = page_to_pfn(page); + mfn = get_phys_to_machine(pfn); + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) + return -EINVAL; if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); @@ -960,7 +970,10 @@ int m2p_remove_override(struct page *page, spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); + WARN_ON(!PagePrivate(page)); + ClearPagePrivate(page); + set_phys_to_machine(pfn, page->index); if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 875025f299b6..6620b73d0490 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -285,7 +285,8 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { - ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -320,7 +321,8 @@ static void unmap_purged_grants(struct work_struct *work) pages[segs_to_unmap] = persistent_gnt->page; if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -328,7 +330,7 @@ static void unmap_purged_grants(struct work_struct *work) kfree(persistent_gnt); } if (segs_to_unmap > 0) { - ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap); + ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); put_free_pages(blkif, pages, segs_to_unmap); } @@ -668,14 +670,15 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, GNTMAP_host_map, pages[i]->handle); pages[i]->handle = BLKBACK_INVALID_HANDLE; if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, unmap_pages, invcount); + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, + invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); invcount = 0; } } if (invcount) { - ret = gnttab_unmap_refs(unmap, unmap_pages, invcount); + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); } @@ -737,7 +740,7 @@ again: } if (segs_to_map) { - ret = gnttab_map_refs(map, pages_to_gnt, segs_to_map); + ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); BUG_ON(ret); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 34a2704fbc88..073b4a19a8b0 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -284,10 +284,8 @@ static int map_grant_pages(struct grant_map *map) } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs_userspace(map->map_ops, - use_ptemod ? map->kmap_ops : NULL, - map->pages, - map->count); + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, + map->pages, map->count); if (err) return err; @@ -317,10 +315,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs_userspace(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, - map->pages + offset, - pages); + err = gnttab_unmap_refs(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 8ee13e2e45e2..b84e3ab839aa 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -928,17 +928,15 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); -int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, +int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count, - bool m2p_override) + struct page **pages, unsigned int count) { int i, ret; bool lazy = false; pte_t *pte; - unsigned long mfn, pfn; + unsigned long mfn; - BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); if (ret) return ret; @@ -957,12 +955,10 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, map_ops[i].dev_bus_addr >> PAGE_SHIFT); } - return 0; + return ret; } - if (m2p_override && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } @@ -979,20 +975,8 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - pfn = page_to_pfn(pages[i]); - - WARN_ON(PagePrivate(pages[i])); - SetPagePrivate(pages[i]); - set_page_private(pages[i], mfn); - - pages[i]->index = pfn_to_mfn(pfn); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; - } - if (m2p_override) - ret = m2p_add_override(mfn, pages[i], kmap_ops ? - &kmap_ops[i] : NULL); + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) goto out; } @@ -1003,32 +987,15 @@ int __gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; } - -int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct page **pages, unsigned int count) -{ - return __gnttab_map_refs(map_ops, NULL, pages, count, false); -} EXPORT_SYMBOL_GPL(gnttab_map_refs); -int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) -{ - return __gnttab_map_refs(map_ops, kmap_ops, pages, count, true); -} -EXPORT_SYMBOL_GPL(gnttab_map_refs_userspace); - -int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, +int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count, - bool m2p_override) + struct page **pages, unsigned int count) { int i, ret; bool lazy = false; - unsigned long pfn, mfn; - BUG_ON(kmap_ops && !m2p_override); ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; @@ -1039,33 +1006,17 @@ int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, INVALID_P2M_ENTRY); } - return 0; + return ret; } - if (m2p_override && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } for (i = 0; i < count; i++) { - pfn = page_to_pfn(pages[i]); - mfn = get_phys_to_machine(pfn); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { - ret = -EINVAL; - goto out; - } - - set_page_private(pages[i], INVALID_P2M_ENTRY); - WARN_ON(!PagePrivate(pages[i])); - ClearPagePrivate(pages[i]); - set_phys_to_machine(pfn, pages[i]->index); - if (m2p_override) - ret = m2p_remove_override(pages[i], - kmap_ops ? - &kmap_ops[i] : NULL, - mfn); + ret = m2p_remove_override(pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) goto out; } @@ -1076,22 +1027,8 @@ int __gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } - -int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *map_ops, - struct page **pages, unsigned int count) -{ - return __gnttab_unmap_refs(map_ops, NULL, pages, count, false); -} EXPORT_SYMBOL_GPL(gnttab_unmap_refs); -int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) -{ - return __gnttab_unmap_refs(map_ops, kmap_ops, pages, count, true); -} -EXPORT_SYMBOL_GPL(gnttab_unmap_refs_userspace); - static unsigned nr_status_frames(unsigned nr_grant_frames) { BUG_ON(grefs_per_grant_frame == 0); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 7ad033dbc845..a5af2a26d94f 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -191,15 +191,11 @@ void gnttab_free_auto_xlat_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -int gnttab_map_refs_userspace(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kunmap_ops, struct page **pages, unsigned int count); -int gnttab_unmap_refs_userspace(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kunmap_ops, - struct page **pages, unsigned int count); /* Perform a batch of grant map/copy operations. Retry every batch slot * for which the hypervisor returns GNTST_eagain. This is typically due From 8101c8dbf6243ba517aab58d69bf1bc37d8b7b9c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Jan 2014 16:05:30 -0500 Subject: [PATCH 065/171] Btrfs: disable snapshot aware defrag for now It's just broken and it's taking a lot of effort to fix it, so for now just disable it so people can defrag in peace. Thanks, Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb74a536add3..1af34d0c744b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2629,7 +2629,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) EXTENT_DEFRAG, 1, cached_state); if (ret) { u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); - if (last_snapshot >= BTRFS_I(inode)->generation) + if (0 && last_snapshot >= BTRFS_I(inode)->generation) /* the inode is shared */ new = record_old_file_extents(inode, ordered_extent); From 0b947aff1599afbbd2ec07ada87b05af0f94cf10 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 29 Jan 2014 21:06:04 +0000 Subject: [PATCH 066/171] Btrfs: use btrfs_crc32c everywhere instead of libcrc32c After the commit titled "Btrfs: fix btrfs boot when compiled as built-in", LIBCRC32C requirement was removed from btrfs' Kconfig. This made it not possible to build a kernel with btrfs enabled (either as module or built-in) if libcrc32c is not enabled as well. So just replace all uses of libcrc32c with the equivalent function in btrfs hash.h - btrfs_crc32c. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 4 ++-- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/send.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 160fb509d720..39bfd56a1f26 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -92,11 +92,11 @@ #include #include #include -#include #include #include #include "ctree.h" #include "disk-io.h" +#include "hash.h" #include "transaction.h" #include "extent_io.h" #include "volumes.h" @@ -1823,7 +1823,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, size_t sublen = i ? PAGE_CACHE_SIZE : (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); - crc = crc32c(crc, data, sublen); + crc = btrfs_crc32c(crc, data, sublen); } btrfs_csum_final(crc, csum); if (memcmp(csum, h->csum, state->csum_size)) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7619147da382..3903bd3f8d2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "hash.h" #include "transaction.h" #include "btrfs_inode.h" #include "volumes.h" @@ -244,7 +244,7 @@ out: u32 btrfs_csum_data(char *data, u32 seed, size_t len) { - return crc32c(seed, data, len); + return btrfs_crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, char *result) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 730dce395858..cf9107a64204 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -24,12 +24,12 @@ #include #include #include -#include #include #include #include "send.h" #include "backref.h" +#include "hash.h" #include "locking.h" #include "disk-io.h" #include "btrfs_inode.h" @@ -620,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx) hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); hdr->crc = 0; - crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); + crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); hdr->crc = cpu_to_le32(crc); ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, From 60efa5eb2e886852a0d5f9e1ffa7c896a1099da8 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 1 Feb 2014 21:27:56 +0000 Subject: [PATCH 067/171] Btrfs: use late_initcall instead of module_init It seems that when init_btrfs_fs() is called, crc32c/crc32c-intel might not always be already initialized, which results in the call to crypto_alloc_shash() returning -ENOENT, as experienced by Ahmet who reported this. Therefore make sure init_btrfs_fs() is called after crc32c is initialized (which is at initialization level 6, module_init), by using late_initcall (which is at initialization level 7) instead of module_init for btrfs. Reported-and-Tested-by: Ahmet Inan Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c02f63356895..97cc24198554 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1996,7 +1996,7 @@ static void __exit exit_btrfs_fs(void) btrfs_hash_exit(); } -module_init(init_btrfs_fs) +late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); From c172ec5c8dc8c09dd5958f4ae542fa321bb15a92 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 17:49:22 +0200 Subject: [PATCH 068/171] libceph: fix error handling in ceph_osdc_init() msgpool_op_reply message pool isn't destroyed if workqueue construction fails. Fix it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 010ff3bd58ad..166d4c7ba065 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2504,9 +2504,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) err = -ENOMEM; osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); if (!osdc->notify_wq) - goto out_msgpool; + goto out_msgpool_reply; + return 0; +out_msgpool_reply: + ceph_msgpool_destroy(&osdc->msgpool_op_reply); out_msgpool: ceph_msgpool_destroy(&osdc->msgpool_op); out_mempool: From d4c42fb493e018e9240810bb6dc5334ae0505145 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2014 14:41:42 -0500 Subject: [PATCH 069/171] NFSv3: Remove unused function nfs3_proc_set_default_acl Cc: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 0851f852568d..9271a6bb9a41 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -233,25 +233,6 @@ fail: return PTR_ERR(alloc); } -int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - umode_t mode) -{ - struct posix_acl *default_acl, *acl; - int error; - - error = posix_acl_create(dir, &mode, &default_acl, &acl); - if (error) - return (error == -EOPNOTSUPP) ? 0 : error; - - error = nfs3_proc_setacls(inode, acl, default_acl); - - if (acl) - posix_acl_release(acl); - if (default_acl) - posix_acl_release(default_acl); - return error; -} - const struct xattr_handler *nfs3_xattr_handlers[] = { &posix_acl_access_xattr_handler, &posix_acl_default_xattr_handler, From 8f493b9cfcd8941c6b27d6ce8e3b4a78c094b3c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Feb 2014 14:36:42 -0500 Subject: [PATCH 070/171] NFSv3: Fix return value of nfs3_proc_setacls nfs3_proc_setacls is used internally by the NFSv3 create operations to set the acl after the file has been created. If the operation fails because the server doesn't support acls, then it must return '0', not -EOPNOTSUPP. Reported-by: Russell King Link: http://lkml.kernel.org/r/20140201010328.GI15937@n2100.arm.linux.org.uk Cc: Christoph Hellwig Tested-by: Takashi Iwai Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9271a6bb9a41..871d6eda8dba 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -113,7 +113,7 @@ getout: return ERR_PTR(status); } -int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, +static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, struct posix_acl *dfacl) { struct nfs_server *server = NFS_SERVER(inode); @@ -198,6 +198,15 @@ out: return status; } +int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + int ret; + ret = __nfs3_proc_setacls(inode, acl, dfacl); + return (ret == -EOPNOTSUPP) ? 0 : ret; + +} + int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct posix_acl *alloc = NULL, *dfacl = NULL; @@ -225,7 +234,7 @@ int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (IS_ERR(alloc)) goto fail; } - status = nfs3_proc_setacls(inode, acl, dfacl); + status = __nfs3_proc_setacls(inode, acl, dfacl); posix_acl_release(alloc); return status; From afca50132cfa7bfc5646676d8c67dc18454f38f8 Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Wed, 29 Jan 2014 16:15:18 -0800 Subject: [PATCH 071/171] xen/pvh: set CR4 flags for APs During bootup in the 'probe_page_size_mask' these CR4 flags are set in there. But for AP processors they are not set as we do not use 'secondary_startup_64' which the baremetal kernels uses. Instead do it in this function which we use in Xen PVH during our startup for AP processors. As such fix it up to make sure we have that flag set. Signed-off-by: Mukesh Rathor Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a4d7b647867f..201d09a7c46b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1473,6 +1473,18 @@ static void xen_pvh_set_cr_flags(int cpu) * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); + + if (!cpu) + return; + /* + * For BSP, PSE PGE are set in probe_page_size_mask(), for APs + * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. + */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + if (cpu_has_pge) + set_in_cr4(X86_CR4_PGE); } /* From 2d7c1b77dd59387070aab355532dd157f888325c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:07 +0100 Subject: [PATCH 072/171] ACPI / hotplug / PCI: Remove entries from bus->devices in reverse order According to the changelog of commit 29ed1f29b68a (PCI: pciehp: Fix null pointer deref when hot-removing SR-IOV device) it is unsafe to walk the bus->devices list of a PCI bus and remove devices from it in direct order, because that may lead to NULL pointer dereferences related to virtual functions. For this reason, change all of the bus->devices list walks in acpiphp_glue.c during which devices may be removed to be carried out in reverse order. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index cd929aed3613..6a4b4b734fbd 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -742,7 +742,7 @@ static void trim_stale_devices(struct pci_dev *dev) /* The device is a bridge. so check the bus below it. */ pm_runtime_get_sync(&dev->dev); - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) + list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list) trim_stale_devices(child); pm_runtime_put(&dev->dev); @@ -773,8 +773,8 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) ; /* do nothing */ } else if (get_slot_status(slot) == ACPI_STA_ALL) { /* remove stale devices if any */ - list_for_each_entry_safe(dev, tmp, &bus->devices, - bus_list) + list_for_each_entry_safe_reverse(dev, tmp, + &bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->device) trim_stale_devices(dev); @@ -805,7 +805,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus) int i; unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM; - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) { for (i=0; iresource[i]; if ((res->flags & type_mask) && !res->start && From f41b32613138ae05329a0f0e7170223b775d6b24 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:17 +0100 Subject: [PATCH 073/171] ACPI / hotplug / PCI: Move PCI rescan-remove locking to hotplug_event() Commit 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove locking) modified ACPIPHP to protect its PCI device removal and addition code paths from races against sysfs-driven rescan and remove operations with the help of PCI rescan-remove locking. However, it overlooked the fact that hotplug_event_work() is not the only caller of hotplug_event() which may also be called by dock_hotplug_event() and that code path is missing the PCI rescan-remove locking. This means that, although the PCI rescan-remove lock is held as appropriate during the handling of events originating from handle_hotplug_event(), the ACPIPHP's operations resulting from dock events may still suffer the race conditions that commit 9217a984671e was supposed to eliminate. To address that problem, move the PCI rescan-remove locking from hotplug_event_work() to hotplug_event() so that it is used regardless of the way that function is invoked. Revamps: 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove locking) Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6a4b4b734fbd..6e5bd79af810 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -852,6 +852,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) mutex_unlock(&acpiphp_context_lock); + pci_lock_rescan_remove(); acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); switch (type) { @@ -905,6 +906,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) break; } + pci_unlock_rescan_remove(); if (bridge) put_bridge(bridge); } @@ -915,11 +917,9 @@ static void hotplug_event_work(void *data, u32 type) acpi_handle handle = context->handle; acpi_scan_lock_acquire(); - pci_lock_rescan_remove(); hotplug_event(handle, type, context); - pci_unlock_rescan_remove(); acpi_scan_lock_release(); acpi_evaluate_hotplug_ost(handle, type, ACPI_OST_SC_SUCCESS, NULL); put_bridge(context->func.parent); From d42f5da2340083301dd2c48ff2d75f6ce4b30767 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:27 +0100 Subject: [PATCH 074/171] ACPI / hotplug / PCI: Scan root bus under the PCI rescan-remove lock Since acpiphp_check_bridge() called by acpiphp_check_host_bridge() does things that require PCI rescan-remove locking around it, make acpiphp_check_host_bridge() use that locking. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6e5bd79af810..931d0b44eace 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -829,7 +829,11 @@ void acpiphp_check_host_bridge(acpi_handle handle) bridge = acpiphp_handle_to_bridge(handle); if (bridge) { + pci_lock_rescan_remove(); + acpiphp_check_bridge(bridge); + + pci_unlock_rescan_remove(); put_bridge(bridge); } } From 1b360f44d009059e446532f29c1a889951e72667 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 22:30:06 +0100 Subject: [PATCH 075/171] ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 931d0b44eace..91eceaf3131b 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -441,7 +441,9 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) list_del(&bridge->list); mutex_unlock(&bridge_mutex); + mutex_lock(&acpiphp_context_lock); bridge->is_going_away = true; + mutex_unlock(&acpiphp_context_lock); } /** @@ -941,6 +943,7 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) { struct acpiphp_context *context; u32 ost_code = ACPI_OST_SC_SUCCESS; + acpi_status status; switch (type) { case ACPI_NOTIFY_BUS_CHECK: @@ -976,13 +979,20 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) mutex_lock(&acpiphp_context_lock); context = acpiphp_get_context(handle); - if (context && !WARN_ON(context->handle != handle)) { - get_bridge(context->func.parent); - acpiphp_put_context(context); - acpi_hotplug_execute(hotplug_event_work, context, type); + if (!context || WARN_ON(context->handle != handle) + || context->func.parent->is_going_away) + goto err_out; + + get_bridge(context->func.parent); + acpiphp_put_context(context); + status = acpi_hotplug_execute(hotplug_event_work, context, type); + if (ACPI_SUCCESS(status)) { mutex_unlock(&acpiphp_context_lock); return; } + put_bridge(context->func.parent); + + err_out: mutex_unlock(&acpiphp_context_lock); ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; From af9d8adc6b832003bbe3d83fde665ae6b4f072eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 22:30:15 +0100 Subject: [PATCH 076/171] ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 91eceaf3131b..e2a783fdb98f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -210,10 +210,29 @@ static void post_dock_fixups(acpi_handle not_used, u32 event, void *data) } } +static void dock_event(acpi_handle handle, u32 type, void *data) +{ + struct acpiphp_context *context; + + mutex_lock(&acpiphp_context_lock); + context = acpiphp_get_context(handle); + if (!context || WARN_ON(context->handle != handle) + || context->func.parent->is_going_away) { + mutex_unlock(&acpiphp_context_lock); + return; + } + get_bridge(context->func.parent); + acpiphp_put_context(context); + mutex_unlock(&acpiphp_context_lock); + + hotplug_event(handle, type, data); + + put_bridge(context->func.parent); +} static const struct acpi_dock_ops acpiphp_dock_ops = { .fixup = post_dock_fixups, - .handler = hotplug_event, + .handler = dock_event, }; /* Check whether the PCI device is managed by native PCIe hotplug driver */ From 789b663ae3d427ea9c50505339a13276e7228c9d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 31 Jan 2014 14:25:19 -0500 Subject: [PATCH 077/171] fs: get_acl() must be allowed to return EOPNOTSUPP posix_acl_xattr_get requires get_acl() to return EOPNOTSUPP if the filesystem cannot support acls. This is needed for NFS, which can't know whether or not the server supports acls until it tries to get/set one. This patch converts posix_acl_chmod and posix_acl_create to deal with EOPNOTSUPP return values from get_acl(). Reported-by: Russell King Link: http://lkml.kernel.org/r/20140130140834.GW15937@n2100.arm.linux.org.uk Cc: Al Viro viro@zeniv.linux.org.uk> Reviewed-by: Christoph Hellwig Tested-by: Takashi Iwai Signed-off-by: Trond Myklebust --- fs/posix_acl.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 38bae5a0ea25..11c54fd51e16 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -521,8 +521,11 @@ posix_acl_chmod(struct inode *inode, umode_t mode) return -EOPNOTSUPP; acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR_OR_NULL(acl)) + if (IS_ERR_OR_NULL(acl)) { + if (acl == ERR_PTR(-EOPNOTSUPP)) + return 0; return PTR_ERR(acl); + } ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) @@ -544,14 +547,15 @@ posix_acl_create(struct inode *dir, umode_t *mode, goto no_acl; p = get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(p)) + if (IS_ERR(p)) { + if (p == ERR_PTR(-EOPNOTSUPP)) + goto apply_umask; return PTR_ERR(p); - - if (!p) { - *mode &= ~current_umask(); - goto no_acl; } + if (!p) + goto apply_umask; + *acl = posix_acl_clone(p, GFP_NOFS); if (!*acl) return -ENOMEM; @@ -575,6 +579,8 @@ posix_acl_create(struct inode *dir, umode_t *mode, } return 0; +apply_umask: + *mode &= ~current_umask(); no_acl: *default_acl = NULL; *acl = NULL; From 4528eb19b00d9ccd65ded6f8201eec704267edd8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Feb 2014 07:39:06 +0100 Subject: [PATCH 078/171] ALSA: hda - Fix silent output on Toshiba Satellite L40 Toshiba Satellite L40 with AD1986A codec requires the EAPD of NID 0x1b to be constantly on, otherwise the output doesn't work. Unlike most of other AD1986A machines, EAPD is correctly implemented in HD-audio manner (that is, bit set = amp on), so we need to clear the inv_eapd flag in the fixup, too. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=67481 Cc: [v3.11+] Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 7a426ed491f2..50b2427f19ca 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -244,6 +244,19 @@ static void ad_fixup_inv_jack_detect(struct hda_codec *codec, } } +/* Toshiba Satellite L40 implements EAPD in a standard way unlike others */ +static void ad1986a_fixup_eapd(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct ad198x_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + codec->inv_eapd = 0; + spec->gen.keep_eapd_on = 1; + spec->eapd_nid = 0x1b; + } +} + enum { AD1986A_FIXUP_INV_JACK_DETECT, AD1986A_FIXUP_ULTRA, @@ -251,6 +264,7 @@ enum { AD1986A_FIXUP_3STACK, AD1986A_FIXUP_LAPTOP, AD1986A_FIXUP_LAPTOP_IMIC, + AD1986A_FIXUP_EAPD, }; static const struct hda_fixup ad1986a_fixups[] = { @@ -311,6 +325,10 @@ static const struct hda_fixup ad1986a_fixups[] = { .chained_before = 1, .chain_id = AD1986A_FIXUP_LAPTOP, }, + [AD1986A_FIXUP_EAPD] = { + .type = HDA_FIXUP_FUNC, + .v.func = ad1986a_fixup_eapd, + }, }; static const struct snd_pci_quirk ad1986a_fixup_tbl[] = { @@ -318,6 +336,7 @@ static const struct snd_pci_quirk ad1986a_fixup_tbl[] = { SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK), SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8200, "ASUS M2", AD1986A_FIXUP_3STACK), SND_PCI_QUIRK(0x10de, 0xcb84, "ASUS A8N-VM", AD1986A_FIXUP_3STACK), + SND_PCI_QUIRK(0x1179, 0xff40, "Toshiba Satellite L40", AD1986A_FIXUP_EAPD), SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_FIXUP_LAPTOP), SND_PCI_QUIRK_MASK(0x144d, 0xff00, 0xc000, "Samsung", AD1986A_FIXUP_SAMSUNG), SND_PCI_QUIRK(0x144d, 0xc027, "Samsung Q1", AD1986A_FIXUP_ULTRA), From 7e8f15c5aa9b8021ee933336f3e055f279482051 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 9 Jan 2014 09:55:43 -0300 Subject: [PATCH 079/171] [media] s5k5baf: allow to handle arbitrary long i2c sequences Using variable length array in s5k5baf_write_arr_seq caused an implicit assumption that i2c sequences should be short. The patch rewrites the function so it can handle sequences of any length and does not use variable length array. Reported-by: Dan Carpenter Signed-off-by: Andrzej Hajda Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/s5k5baf.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/media/i2c/s5k5baf.c b/drivers/media/i2c/s5k5baf.c index 4b8381111cbd..77e10e0fd8d6 100644 --- a/drivers/media/i2c/s5k5baf.c +++ b/drivers/media/i2c/s5k5baf.c @@ -478,25 +478,33 @@ static void s5k5baf_write_arr_seq(struct s5k5baf *state, u16 addr, u16 count, const u16 *seq) { struct i2c_client *c = v4l2_get_subdevdata(&state->sd); - __be16 buf[count + 1]; - int ret, n; + __be16 buf[65]; s5k5baf_i2c_write(state, REG_CMDWR_ADDR, addr); if (state->error) return; - buf[0] = __constant_cpu_to_be16(REG_CMD_BUF); - for (n = 1; n <= count; ++n) - buf[n] = cpu_to_be16(*seq++); - - n *= 2; - ret = i2c_master_send(c, (char *)buf, n); v4l2_dbg(3, debug, c, "i2c_write_seq(count=%d): %*ph\n", count, - min(2 * count, 64), seq - count); + min(2 * count, 64), seq); - if (ret != n) { - v4l2_err(c, "i2c_write_seq: error during transfer (%d)\n", ret); - state->error = ret; + buf[0] = __constant_cpu_to_be16(REG_CMD_BUF); + + while (count > 0) { + int n = min_t(int, count, ARRAY_SIZE(buf) - 1); + int ret, i; + + for (i = 1; i <= n; ++i) + buf[i] = cpu_to_be16(*seq++); + + i *= 2; + ret = i2c_master_send(c, (char *)buf, i); + if (ret != i) { + v4l2_err(c, "i2c_write_seq: error during transfer (%d)\n", ret); + state->error = ret; + break; + } + + count -= n; } } From a62cffefc9a327a5bc45ba9318ef601b525f4bd1 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 16 Jan 2014 08:26:33 -0300 Subject: [PATCH 080/171] [media] s5p-jpeg: Fix wrong NV12 format parameters NV12 format entries in the sjpeg_formats array had wrong colplanes, depth and v_align values. Signed-off-by: Jacek Anaszewski Signed-off-by: Kyungmin Park Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-jpeg/jpeg-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index a1c78c870b68..7d68d0b9966a 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -175,7 +175,7 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = { { .name = "YUV 4:2:0 planar, Y/CbCr", .fourcc = V4L2_PIX_FMT_NV12, - .depth = 16, + .depth = 12, .colplanes = 2, .h_align = 1, .v_align = 1, @@ -188,10 +188,10 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = { { .name = "YUV 4:2:0 planar, Y/CbCr", .fourcc = V4L2_PIX_FMT_NV12, - .depth = 16, - .colplanes = 4, + .depth = 12, + .colplanes = 2, .h_align = 4, - .v_align = 1, + .v_align = 4, .flags = SJPEG_FMT_FLAG_ENC_OUTPUT | SJPEG_FMT_FLAG_DEC_CAPTURE | SJPEG_FMT_FLAG_S5P | From a27a19d61535ce61a1e2f9274f99316c7532d5ef Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 7 Jan 2014 19:07:52 -0300 Subject: [PATCH 081/171] [media] exynos4-is: Fix error paths in probe() for !pm_runtime_enabled() Ensure clk_disable() is called on error paths only when clk_enable() was previously called. This fixes following build warning: .../media-git/drivers/media/platform/exynos4-is/fimc-lite.c: In function 'fimc_lite_probe': .../media-git/drivers/media/platform/exynos4-is/fimc-lite.c:1583:1: warning: label 'err_sd' defined but not used [-Wunused-label] Reported-by: Hans Verkuil Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos4-is/fimc-core.c | 3 ++- drivers/media/platform/exynos4-is/fimc-lite.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c index a7dfd07e8389..dcbea59c8c69 100644 --- a/drivers/media/platform/exynos4-is/fimc-core.c +++ b/drivers/media/platform/exynos4-is/fimc-core.c @@ -1027,7 +1027,8 @@ static int fimc_probe(struct platform_device *pdev) return 0; err_gclk: - clk_disable(fimc->clock[CLK_GATE]); + if (!pm_runtime_enabled(dev)) + clk_disable(fimc->clock[CLK_GATE]); err_sd: fimc_unregister_capture_subdev(fimc); err_sclk: diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c index 1234734bccf4..5213ff03d28a 100644 --- a/drivers/media/platform/exynos4-is/fimc-lite.c +++ b/drivers/media/platform/exynos4-is/fimc-lite.c @@ -1563,7 +1563,7 @@ static int fimc_lite_probe(struct platform_device *pdev) if (!pm_runtime_enabled(dev)) { ret = clk_enable(fimc->clock); if (ret < 0) - goto err_clk_put; + goto err_sd; } fimc->alloc_ctx = vb2_dma_contig_init_ctx(dev); @@ -1579,7 +1579,8 @@ static int fimc_lite_probe(struct platform_device *pdev) return 0; err_clk_dis: - clk_disable(fimc->clock); + if (!pm_runtime_enabled(dev)) + clk_disable(fimc->clock); err_sd: fimc_lite_unregister_capture_subdev(fimc); err_clk_put: From d003a30dd7401f38c876bb97e3e51f4fd9fed15e Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 7 Jan 2014 19:08:48 -0300 Subject: [PATCH 082/171] [media] exynos4-is: Compile in fimc runtime PM callbacks conditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enclose the runtime PM helpers in #ifdef CONFIG_PM_RUNTIME/#endif to avoid following compile warning when CONFIG_PM_RUNTIME is disabled: CC drivers/media/platform/exynos4-is/fimc-core.o drivers/media/platform/exynos4-is/fimc-core.c:1040:12: warning: ‘fimc_runtime_resume’ defined but not used drivers/media/platform/exynos4-is/fimc-core.c:1057:12: warning: ‘fimc_runtime_suspend’ defined but not used Signed-off-by: Sylwester Nawrocki Reviewed-by: Jingoo Han Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos4-is/fimc-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c index dcbea59c8c69..da2fc86cc524 100644 --- a/drivers/media/platform/exynos4-is/fimc-core.c +++ b/drivers/media/platform/exynos4-is/fimc-core.c @@ -1037,6 +1037,7 @@ err_sclk: return ret; } +#ifdef CONFIG_PM_RUNTIME static int fimc_runtime_resume(struct device *dev) { struct fimc_dev *fimc = dev_get_drvdata(dev); @@ -1069,6 +1070,7 @@ static int fimc_runtime_suspend(struct device *dev) dbg("fimc%d: state: 0x%lx", fimc->id, fimc->state); return ret; } +#endif #ifdef CONFIG_PM_SLEEP static int fimc_resume(struct device *dev) From 656e62dc84a38fa847d9501636acb098a32d6f89 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 16 Jan 2014 11:49:28 -0300 Subject: [PATCH 083/171] [media] exynos4-is: Compile in fimc-lite runtime PM callbacks conditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enclose the runtime PM helpers in #ifdef CONFIG_PM_RUNTIME/#endif to avoid following compile warning when CONFIG_PM_RUNTIME is disabled: CC drivers/media/platform/exynos4-is/fimc-lite.o drivers/media/platform/exynos4-is/fimc-lite.c:1591:12: warning: ‘fimc_lite_runtime_resume’ defined but not used [-Wunused-function] drivers/media/platform/exynos4-is/fimc-lite.c:1599:12: warning: ‘fimc_lite_runtime_suspend’ defined but not used [-Wunused-function] Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos4-is/fimc-lite.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c index 5213ff03d28a..779ec3cd259d 100644 --- a/drivers/media/platform/exynos4-is/fimc-lite.c +++ b/drivers/media/platform/exynos4-is/fimc-lite.c @@ -1588,6 +1588,7 @@ err_clk_put: return ret; } +#ifdef CONFIG_PM_RUNTIME static int fimc_lite_runtime_resume(struct device *dev) { struct fimc_lite *fimc = dev_get_drvdata(dev); @@ -1603,6 +1604,7 @@ static int fimc_lite_runtime_suspend(struct device *dev) clk_disable(fimc->clock); return 0; } +#endif #ifdef CONFIG_PM_SLEEP static int fimc_lite_resume(struct device *dev) From 38121b6ef314e556d3b59acc0c41a4d6f7e597fa Mon Sep 17 00:00:00 2001 From: Levente Kurusa Date: Thu, 19 Dec 2013 12:06:49 -0300 Subject: [PATCH 084/171] [media] media: bt8xx: add missing put_device call This is required so that we give up the last reference to the device. Remove the kfree() because the put_device() call will actually call release_sub_device which in turn kfrees the device. Signed-off-by: Levente Kurusa Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/bt8xx/bttv-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/bt8xx/bttv-gpio.c b/drivers/media/pci/bt8xx/bttv-gpio.c index 922e8233fd0b..3f364b7062b9 100644 --- a/drivers/media/pci/bt8xx/bttv-gpio.c +++ b/drivers/media/pci/bt8xx/bttv-gpio.c @@ -98,7 +98,7 @@ int bttv_sub_add_device(struct bttv_core *core, char *name) err = device_register(&sub->dev); if (0 != err) { - kfree(sub); + put_device(&sub->dev); return err; } pr_info("%d: add subdevice \"%s\"\n", core->nr, dev_name(&sub->dev)); From 50c88544d225baadd6de1c4365d4ed16cc942a37 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 20 Dec 2013 16:17:32 -0300 Subject: [PATCH 085/171] [media] go7007-loader: fix usb_dev leak There is usb_get_dev() in go7007_loader_probe(), but there is no usb_put_dev() anywhere. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/go7007/go7007-loader.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/media/go7007/go7007-loader.c b/drivers/staging/media/go7007/go7007-loader.c index 10bb41c2fb6d..eecb1f2a5574 100644 --- a/drivers/staging/media/go7007/go7007-loader.c +++ b/drivers/staging/media/go7007/go7007-loader.c @@ -59,7 +59,7 @@ static int go7007_loader_probe(struct usb_interface *interface, if (usbdev->descriptor.bNumConfigurations != 1) { dev_err(&interface->dev, "can't handle multiple config\n"); - return -ENODEV; + goto failed2; } vendor = le16_to_cpu(usbdev->descriptor.idVendor); @@ -108,6 +108,7 @@ static int go7007_loader_probe(struct usb_interface *interface, return 0; failed2: + usb_put_dev(usbdev); dev_err(&interface->dev, "probe failed\n"); return -ENODEV; } @@ -115,6 +116,7 @@ failed2: static void go7007_loader_disconnect(struct usb_interface *interface) { dev_info(&interface->dev, "disconnect\n"); + usb_put_dev(interface_to_usbdev(interface)); usb_set_intfdata(interface, NULL); } From cca36e2eecec2b8fc869a50ffd3bd0adeed92b8b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 3 Jan 2014 08:10:49 -0300 Subject: [PATCH 086/171] [media] Revert "[media] videobuf_vm_{open,close} race fixes" This reverts commit a242f426108c284049a69710f871cc9f11b13e61. That commit actually caused deadlocks, rather then fixing them. If ext_lock is set to NULL (otherwise videobuf_queue_lock doesn't do anything), then you get this deadlock: The driver's mmap function calls videobuf_mmap_mapper which calls videobuf_queue_lock on q. videobuf_mmap_mapper calls __videobuf_mmap_mapper, __videobuf_mmap_mapper calls videobuf_vm_open and videobuf_vm_open calls videobuf_queue_lock on q (introduced by above patch): deadlocked. This affects drivers using dma-contig and dma-vmalloc. Only dma-sg is not affected since it doesn't call videobuf_vm_open from __videobuf_mmap_mapper. Most drivers these days have a non-NULL ext_lock. Those that still use NULL there are all fairly obscure drivers, which is why this hasn't been seen earlier. Since everything worked perfectly fine for many years I prefer to just revert this patch rather than trying to fix it. videobuf is quite fragile and I rather not touch it too much. Work is (slowly) progressing to move everything over to vb2 or at the very least use non-NULL ext_lock in videobuf. Signed-off-by: Hans Verkuil Cc: # for v3.11 and up Cc: Al Viro Reported-by: Pete Eberlein Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/videobuf-dma-contig.c | 12 +++++------- drivers/media/v4l2-core/videobuf-dma-sg.c | 10 ++++------ drivers/media/v4l2-core/videobuf-vmalloc.c | 10 ++++------ 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c index 65411adcd0ea..7e6b209b7002 100644 --- a/drivers/media/v4l2-core/videobuf-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf-dma-contig.c @@ -66,14 +66,11 @@ static void __videobuf_dc_free(struct device *dev, static void videobuf_vm_open(struct vm_area_struct *vma) { struct videobuf_mapping *map = vma->vm_private_data; - struct videobuf_queue *q = map->q; - dev_dbg(q->dev, "vm_open %p [count=%u,vma=%08lx-%08lx]\n", + dev_dbg(map->q->dev, "vm_open %p [count=%u,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); map->count++; - videobuf_queue_unlock(q); } static void videobuf_vm_close(struct vm_area_struct *vma) @@ -85,11 +82,12 @@ static void videobuf_vm_close(struct vm_area_struct *vma) dev_dbg(q->dev, "vm_close %p [count=%u,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); - if (!--map->count) { + map->count--; + if (0 == map->count) { struct videobuf_dma_contig_memory *mem; dev_dbg(q->dev, "munmap %p q=%p\n", map, q); + videobuf_queue_lock(q); /* We need first to cancel streams, before unmapping */ if (q->streaming) @@ -128,8 +126,8 @@ static void videobuf_vm_close(struct vm_area_struct *vma) kfree(map); + videobuf_queue_unlock(q); } - videobuf_queue_unlock(q); } static const struct vm_operations_struct videobuf_vm_ops = { diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 9db674ccdc68..828e7c10bd70 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -338,14 +338,11 @@ EXPORT_SYMBOL_GPL(videobuf_dma_free); static void videobuf_vm_open(struct vm_area_struct *vma) { struct videobuf_mapping *map = vma->vm_private_data; - struct videobuf_queue *q = map->q; dprintk(2, "vm_open %p [count=%d,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); map->count++; - videobuf_queue_unlock(q); } static void videobuf_vm_close(struct vm_area_struct *vma) @@ -358,9 +355,10 @@ static void videobuf_vm_close(struct vm_area_struct *vma) dprintk(2, "vm_close %p [count=%d,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); - if (!--map->count) { + map->count--; + if (0 == map->count) { dprintk(1, "munmap %p q=%p\n", map, q); + videobuf_queue_lock(q); for (i = 0; i < VIDEO_MAX_FRAME; i++) { if (NULL == q->bufs[i]) continue; @@ -376,9 +374,9 @@ static void videobuf_vm_close(struct vm_area_struct *vma) q->bufs[i]->baddr = 0; q->ops->buf_release(q, q->bufs[i]); } + videobuf_queue_unlock(q); kfree(map); } - videobuf_queue_unlock(q); return; } diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c index 1365c651c177..2ff7fcc77b11 100644 --- a/drivers/media/v4l2-core/videobuf-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf-vmalloc.c @@ -54,14 +54,11 @@ MODULE_LICENSE("GPL"); static void videobuf_vm_open(struct vm_area_struct *vma) { struct videobuf_mapping *map = vma->vm_private_data; - struct videobuf_queue *q = map->q; dprintk(2, "vm_open %p [count=%u,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); map->count++; - videobuf_queue_unlock(q); } static void videobuf_vm_close(struct vm_area_struct *vma) @@ -73,11 +70,12 @@ static void videobuf_vm_close(struct vm_area_struct *vma) dprintk(2, "vm_close %p [count=%u,vma=%08lx-%08lx]\n", map, map->count, vma->vm_start, vma->vm_end); - videobuf_queue_lock(q); - if (!--map->count) { + map->count--; + if (0 == map->count) { struct videobuf_vmalloc_memory *mem; dprintk(1, "munmap %p q=%p\n", map, q); + videobuf_queue_lock(q); /* We need first to cancel streams, before unmapping */ if (q->streaming) @@ -116,8 +114,8 @@ static void videobuf_vm_close(struct vm_area_struct *vma) kfree(map); + videobuf_queue_unlock(q); } - videobuf_queue_unlock(q); return; } From 548df7831adc34eca3ccefa29f768cef84315d6e Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 8 Jan 2014 05:01:33 -0300 Subject: [PATCH 087/171] [media] vb2: Check if there are buffers before streamon This patch adds a test preventing streamon() if there is no buffer ready. Without this patch, a user could call streamon() before preparing any buffer. This leads to a situation where if he calls close() before calling streamoff() the device is kept streaming. Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: Marek Szyprowski Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/videobuf2-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 5a5fb7f09b7b..a127925c9d61 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1776,6 +1776,11 @@ static int vb2_internal_streamon(struct vb2_queue *q, enum v4l2_buf_type type) return 0; } + if (!q->num_buffers) { + dprintk(1, "streamon: no buffers have been allocated\n"); + return -EINVAL; + } + /* * If any buffers were queued before streamon, * we can now pass them to driver for processing. From 08e10972661db78d0e0a2d4a4c28fc3bf93617ba Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Wed, 29 Jan 2014 23:10:11 -0300 Subject: [PATCH 088/171] [media] update Michael Krufky's email address I am no longer available at the kernellabs.com or m1k.net email addresses. Update each instance of my email to my linuxtv.org account. Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- Documentation/dvb/contributors.txt | 2 +- drivers/media/dvb-frontends/nxt200x.c | 2 +- drivers/media/pci/bt8xx/bttv-cards.c | 2 +- drivers/media/pci/saa7134/saa7134-cards.c | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c | 4 ++-- drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c | 4 ++-- drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h | 2 +- drivers/media/usb/dvb-usb-v2/mxl111sf.c | 4 ++-- drivers/media/usb/dvb-usb-v2/mxl111sf.h | 2 +- 17 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Documentation/dvb/contributors.txt b/Documentation/dvb/contributors.txt index 47c30098dab6..731a009723c7 100644 --- a/Documentation/dvb/contributors.txt +++ b/Documentation/dvb/contributors.txt @@ -78,7 +78,7 @@ Peter Beutner Wilson Michaels for the lgdt330x frontend driver, and various bugfixes -Michael Krufky +Michael Krufky for maintaining v4l/dvb inter-tree dependencies Taylor Jacob diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c index 4bf057544607..8a8e1ecb762d 100644 --- a/drivers/media/dvb-frontends/nxt200x.c +++ b/drivers/media/dvb-frontends/nxt200x.c @@ -2,7 +2,7 @@ * Support for NXT2002 and NXT2004 - VSB/QAM * * Copyright (C) 2005 Kirk Lapray - * Copyright (C) 2006 Michael Krufky + * Copyright (C) 2006-2014 Michael Krufky * based on nxt2002 by Taylor Jacob * and nxt2004 by Jean-Francois Thibert * diff --git a/drivers/media/pci/bt8xx/bttv-cards.c b/drivers/media/pci/bt8xx/bttv-cards.c index d85cb0ace4dc..6662b495b22c 100644 --- a/drivers/media/pci/bt8xx/bttv-cards.c +++ b/drivers/media/pci/bt8xx/bttv-cards.c @@ -2426,7 +2426,7 @@ struct tvcard bttv_tvcards[] = { }, /* ---- card 0x87---------------------------------- */ [BTTV_BOARD_DVICO_FUSIONHDTV_5_LITE] = { - /* Michael Krufky */ + /* Michael Krufky */ .name = "DViCO FusionHDTV 5 Lite", .tuner_type = TUNER_LG_TDVS_H06XF, /* TDVS-H064F */ .tuner_addr = ADDR_UNSET, diff --git a/drivers/media/pci/saa7134/saa7134-cards.c b/drivers/media/pci/saa7134/saa7134-cards.c index d45e7f6ff332..c9b2350e92c8 100644 --- a/drivers/media/pci/saa7134/saa7134-cards.c +++ b/drivers/media/pci/saa7134/saa7134-cards.c @@ -2590,7 +2590,7 @@ struct saa7134_board saa7134_boards[] = { }}, }, [SAA7134_BOARD_AVERMEDIA_AVERTVHD_A180] = { - /* Michael Krufky + /* Michael Krufky * Uses Alps Electric TDHU2, containing NXT2004 ATSC Decoder * AFAIK, there is no analog demod, thus, * no support for analog television. diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c index d83df4bb72d3..0a98d04c53e4 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c @@ -1,7 +1,7 @@ /* * mxl111sf-demod.c - driver for the MaxLinear MXL111SF DVB-T demodulator * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -601,7 +601,7 @@ struct dvb_frontend *mxl111sf_demod_attach(struct mxl111sf_state *mxl_state, EXPORT_SYMBOL_GPL(mxl111sf_demod_attach); MODULE_DESCRIPTION("MaxLinear MxL111SF DVB-T demodulator driver"); -MODULE_AUTHOR("Michael Krufky "); +MODULE_AUTHOR("Michael Krufky "); MODULE_LICENSE("GPL"); MODULE_VERSION("0.1"); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h index 3f3f8bfd190b..2d4530f5be54 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h @@ -1,7 +1,7 @@ /* * mxl111sf-demod.h - driver for the MaxLinear MXL111SF DVB-T demodulator * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c index e4121cb8f5ef..a619410adde4 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c @@ -1,7 +1,7 @@ /* * mxl111sf-gpio.c - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h index 0220f54299a5..b85a5772d771 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.h @@ -1,7 +1,7 @@ /* * mxl111sf-gpio.h - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c index 34434557ef65..a101d06eb143 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.c @@ -1,7 +1,7 @@ /* * mxl111sf-i2c.c - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h index a57a45ffb9e4..465762145ad2 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-i2c.h @@ -1,7 +1,7 @@ /* * mxl111sf-i2c.h - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c index b741b3a7a325..f6b348024bec 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.c @@ -1,7 +1,7 @@ /* * mxl111sf-phy.c - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h index f0756071d347..0643738de7de 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-phy.h @@ -1,7 +1,7 @@ /* * mxl111sf-phy.h - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h index 17831b0fb9db..89bf115e927e 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-reg.h @@ -1,7 +1,7 @@ /* * mxl111sf-reg.h - driver for the MaxLinear MXL111SF * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c index 879c529640f7..a8d2c7053674 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c @@ -1,7 +1,7 @@ /* * mxl111sf-tuner.c - driver for the MaxLinear MXL111SF CMOS tuner * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -512,7 +512,7 @@ struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe, EXPORT_SYMBOL_GPL(mxl111sf_tuner_attach); MODULE_DESCRIPTION("MaxLinear MxL111SF CMOS tuner driver"); -MODULE_AUTHOR("Michael Krufky "); +MODULE_AUTHOR("Michael Krufky "); MODULE_LICENSE("GPL"); MODULE_VERSION("0.1"); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h index 90f583e5d6a6..5fc0121736ff 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h @@ -1,7 +1,7 @@ /* * mxl111sf-tuner.h - driver for the MaxLinear MXL111SF CMOS tuner * - * Copyright (C) 2010 Michael Krufky + * Copyright (C) 2010-2014 Michael Krufky * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c index 08240e498451..8ceff4290bf1 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Michael Krufky (mkrufky@kernellabs.com) + * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -1421,7 +1421,7 @@ static struct usb_driver mxl111sf_usb_driver = { module_usb_driver(mxl111sf_usb_driver); -MODULE_AUTHOR("Michael Krufky "); +MODULE_AUTHOR("Michael Krufky "); MODULE_DESCRIPTION("Driver for MaxLinear MxL111SF"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.h b/drivers/media/usb/dvb-usb-v2/mxl111sf.h index 9816de86e48c..8516c011b7cc 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Michael Krufky (mkrufky@kernellabs.com) + * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free From 62fd0d30e1fbd12d5a07ff722b2726bb8fa630c7 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 30 Jan 2014 00:05:01 -0300 Subject: [PATCH 089/171] [media] cx24117: remove dead code in always 'false' if statement At this point of the execution in the function cx24117_attach() demod cannot be '0'. In that case the function returns earlier with an error value ('NULL'). Remove the if statement. This error has been reported by scan.coverity.com Signed-off-by: Andi Shyti Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/cx24117.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c index 68f768a5422d..782dcfe6ae69 100644 --- a/drivers/media/dvb-frontends/cx24117.c +++ b/drivers/media/dvb-frontends/cx24117.c @@ -1200,12 +1200,6 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config, state->demod = demod - 1; state->priv = priv; - /* test i2c bus for ack */ - if (demod == 0) { - if (cx24117_readreg(state, 0x00) < 0) - goto error3; - } - dev_info(&state->priv->i2c->dev, "%s: Attaching frontend %d\n", KBUILD_MODNAME, state->demod); @@ -1216,8 +1210,6 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config, state->frontend.demodulator_priv = state; return &state->frontend; -error3: - kfree(state); error2: cx24117_release_priv(priv); error1: From a33dd5171d141c378df498aba3fa3c9a573cacb6 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 30 Jan 2014 00:06:41 -0300 Subject: [PATCH 090/171] [media] cx24117: use a valid dev pointer for dev_err printout Don't use '&state->priv->i2c->dev' reference to device because state is still 'NULL'. Use '&i2c->dev' instead. This bug has been reported by scan.coverity.com Signed-off-by: Andi Shyti Signed-off-by: Michael Krufky Cc: vger@stable.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/cx24117.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c index 782dcfe6ae69..a6c3c9e2e897 100644 --- a/drivers/media/dvb-frontends/cx24117.c +++ b/drivers/media/dvb-frontends/cx24117.c @@ -1176,7 +1176,7 @@ struct dvb_frontend *cx24117_attach(const struct cx24117_config *config, switch (demod) { case 0: - dev_err(&state->priv->i2c->dev, + dev_err(&i2c->dev, "%s: Error attaching frontend %d\n", KBUILD_MODNAME, demod); goto error1; From 866e8d8a9dc1ebb4f9e67197e264ac2df81f7d4b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 30 Jan 2014 00:11:33 -0300 Subject: [PATCH 091/171] [media] mxl111sf: Fix unintentional garbage stack read mxl111sf_read_reg takes an address of a variable to write to as an argument. drivers/media/usb/dvb-usb-v2/mxl111sf-gpio.c:mxl111sf_config_pin_mux_modes passes several uninitialized stack variables to this routine, expecting them to be filled in. In the event that something unexpected happens when reading from the chip, we end up doing a pr_debug of the value passed in, revealing whatever garbage happened to be on the stack. Change the pr_debug to match what happens in the 'success' case, where we assign buf[1] to *data. Spotted with Coverity (Bugs 731910 through 731917) Signed-off-by: Dave Jones Signed-off-by: Michael Krufky Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/mxl111sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c index 8ceff4290bf1..c7304fa8ab73 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c @@ -105,7 +105,7 @@ int mxl111sf_read_reg(struct mxl111sf_state *state, u8 addr, u8 *data) ret = -EINVAL; } - pr_debug("R: (0x%02x, 0x%02x)\n", addr, *data); + pr_debug("R: (0x%02x, 0x%02x)\n", addr, buf[1]); fail: return ret; } From 13e1b87c986100169b0695aeb26970943665eda9 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 30 Jan 2014 00:17:09 -0300 Subject: [PATCH 092/171] [media] mxl111sf: Fix compile when CONFIG_DVB_USB_MXL111SF is unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following build error: drivers/media/usb/dvb-usb-v2/ mxl111sf-tuner.h:72:9: error: expected ‘;’, ‘,’ or ‘)’ before ‘struct’ struct mxl111sf_tuner_config *cfg) Signed-off-by: Dave Jones Signed-off-by: Michael Krufky Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h index 5fc0121736ff..2046db22519e 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h @@ -68,7 +68,7 @@ struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe, #else static inline struct dvb_frontend *mxl111sf_tuner_attach(struct dvb_frontend *fe, - struct mxl111sf_state *mxl_state + struct mxl111sf_state *mxl_state, struct mxl111sf_tuner_config *cfg) { printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__); From f2e4c5e004691dfe37d0e4b363296f28abdb9bc7 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Thu, 16 Jan 2014 08:59:30 -0300 Subject: [PATCH 093/171] [media] af9035: add ID [2040:f900] Hauppauge WinTV-MiniStick 2 Add USB ID [2040:f900] for Hauppauge WinTV-MiniStick 2. Device is build upon IT9135 chipset. Tested-by: Stefan Becker Signed-off-by: Antti Palosaari Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/af9035.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c index 8f9b2cea88f0..8ede8ea762e6 100644 --- a/drivers/media/usb/dvb-usb-v2/af9035.c +++ b/drivers/media/usb/dvb-usb-v2/af9035.c @@ -1539,6 +1539,8 @@ static const struct usb_device_id af9035_id_table[] = { &af9035_props, "TerraTec Cinergy T Stick Dual RC (rev. 2)", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6a05, &af9035_props, "Leadtek WinFast DTV Dongle Dual", NULL) }, + { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xf900, + &af9035_props, "Hauppauge WinTV-MiniStick 2", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, af9035_id_table); From 1ba6c90161ac058f580e6ceb5ccc14dcd86365d1 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 17 Jan 2014 13:38:00 -0300 Subject: [PATCH 094/171] [media] hdpvr: Fix memory leak in debug cppcheck reported memory leak in device_authorizatio() within hdpvr-core.c. When the debug option is specified and the code jump to "unlock:" label, print_buf was not freed. Confirm the module succesfully compiled without error. Signed-off-by: Masanari Iida Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/hdpvr/hdpvr-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c index 2f0c89cbac76..c5638964c3f2 100644 --- a/drivers/media/usb/hdpvr/hdpvr-core.c +++ b/drivers/media/usb/hdpvr/hdpvr-core.c @@ -198,7 +198,6 @@ static int device_authorization(struct hdpvr_device *dev) hex_dump_to_buffer(response, 8, 16, 1, print_buf, 5*buf_size+1, 0); v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, " response: %s\n", print_buf); - kfree(print_buf); #endif msleep(100); @@ -214,6 +213,9 @@ static int device_authorization(struct hdpvr_device *dev) retval = ret != 8; unlock: mutex_unlock(&dev->usbc_mutex); +#ifdef HDPVR_DEBUG + kfree(print_buf); +#endif return retval; } From 257cc4b5c57060a65e9c805fb1c225688ebbca80 Mon Sep 17 00:00:00 2001 From: Martin Bugge Date: Thu, 23 Jan 2014 06:40:00 -0300 Subject: [PATCH 095/171] [media] v4l2-dv-timings: fix GTF calculation Round off image width to nearest 8 (GTF_CELL_GRAN) A source sending a GTF (Generalized Timing Formula) format have no means of signalling image width. The assumed aspect ratio may result in an odd image width but according to the standard image width should be in multiple of 8. Cc: Mats Randgaard Signed-off-by: Martin Bugge Reviewed-by: Hans Verkuil Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dv-timings.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index ee52b9f4a944..f7902fe8a526 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -515,6 +515,7 @@ bool v4l2_detect_gtf(unsigned frame_height, aspect.denominator = 9; } image_width = ((image_height * aspect.numerator) / aspect.denominator); + image_width = (image_width + GTF_CELL_GRAN/2) & ~(GTF_CELL_GRAN - 1); /* Horizontal */ if (default_gtf) From 57f0547fbc1e925f5e58c76f311a6632c3f37740 Mon Sep 17 00:00:00 2001 From: Martin Bugge Date: Wed, 29 Jan 2014 06:50:20 -0300 Subject: [PATCH 096/171] [media] adv7842: Composite free-run platfrom-data fix Incorrectly setting of free-run for Composite. Copy/paste regression fix. Signed-off-by: Martin Bugge Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/adv7842.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 1effc21e1cdd..9bbd6656fb8f 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -2554,7 +2554,7 @@ static int adv7842_core_init(struct v4l2_subdev *sd) sdp_write_and_or(sd, 0xdd, 0xf0, pdata->sdp_free_run_force | (pdata->sdp_free_run_cbar_en << 1) | (pdata->sdp_free_run_man_col_en << 2) | - (pdata->sdp_free_run_force << 3)); + (pdata->sdp_free_run_auto << 3)); /* TODO from platform data */ cp_write(sd, 0x69, 0x14); /* Enable CP CSC */ From 4294ad1c52caad3239404cea9e0cbfeb435e3a25 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Wed, 29 Jan 2014 19:11:46 +0100 Subject: [PATCH 097/171] MIPS: Alchemy: Fix DB1100 GPIO registration With CONFIG_GPIOLIB=y gpios need to be requested before they can be modified. Request the SD carddetect pins, and drop the SPI direction setup, as the driver does that for us anyway. This gets rid of a lot of WARN_ON()s triggered by GPIO core, and restores functionality of the touschreen controller. Signed-off-by: Manuel Lauss Cc: Linux-MIPS Patchwork: https://patchwork.linux-mips.org/patch/6497/ Signed-off-by: Ralf Baechle --- arch/mips/alchemy/devboards/db1000.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c index 11f3ad20321c..5483906e0f86 100644 --- a/arch/mips/alchemy/devboards/db1000.c +++ b/arch/mips/alchemy/devboards/db1000.c @@ -534,13 +534,10 @@ static int __init db1000_dev_init(void) s0 = AU1100_GPIO1_INT; s1 = AU1100_GPIO4_INT; + gpio_request(19, "sd0_cd"); + gpio_request(20, "sd1_cd"); gpio_direction_input(19); /* sd0 cd# */ gpio_direction_input(20); /* sd1 cd# */ - gpio_direction_input(21); /* touch pendown# */ - gpio_direction_input(207); /* SPI MISO */ - gpio_direction_output(208, 0); /* SPI MOSI */ - gpio_direction_output(209, 1); /* SPI SCK */ - gpio_direction_output(210, 1); /* SPI CS# */ /* spi_gpio on SSI0 pins */ pfc = __raw_readl((void __iomem *)SYS_PINFUNC); From 6776254b1c28fece9535d42baf2db88756121fe7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Feb 2014 12:29:01 +0000 Subject: [PATCH 098/171] MIPS: Wire up sched_setattr/sched_getattr syscalls Wire up for MIPS the new sched_setattr and sched_getattr system calls added in commit d50dde5a10f3 (sched: Add new scheduler syscalls to support an extended scheduling parameters ABI) merged in v3.14-rc1. Signed-off-by: James Hogan Reviewed-by: Markos Chandras Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6502/ Signed-off-by: Ralf Baechle --- arch/mips/include/uapi/asm/unistd.h | 18 ++++++++++++------ arch/mips/kernel/scall32-o32.S | 2 ++ arch/mips/kernel/scall64-64.S | 2 ++ arch/mips/kernel/scall64-n32.S | 2 ++ arch/mips/kernel/scall64-o32.S | 2 ++ 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 1dee279f9665..d6e154a9e6a5 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -369,16 +369,18 @@ #define __NR_process_vm_writev (__NR_Linux + 346) #define __NR_kcmp (__NR_Linux + 347) #define __NR_finit_module (__NR_Linux + 348) +#define __NR_sched_setattr (__NR_Linux + 349) +#define __NR_sched_getattr (__NR_Linux + 350) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 348 +#define __NR_Linux_syscalls 350 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 348 +#define __NR_O32_Linux_syscalls 350 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -695,16 +697,18 @@ #define __NR_kcmp (__NR_Linux + 306) #define __NR_finit_module (__NR_Linux + 307) #define __NR_getdents64 (__NR_Linux + 308) +#define __NR_sched_setattr (__NR_Linux + 309) +#define __NR_sched_getattr (__NR_Linux + 310) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 308 +#define __NR_Linux_syscalls 310 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 308 +#define __NR_64_Linux_syscalls 310 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1025,15 +1029,17 @@ #define __NR_process_vm_writev (__NR_Linux + 310) #define __NR_kcmp (__NR_Linux + 311) #define __NR_finit_module (__NR_Linux + 312) +#define __NR_sched_setattr (__NR_Linux + 313) +#define __NR_sched_getattr (__NR_Linux + 314) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 312 +#define __NR_Linux_syscalls 314 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 312 +#define __NR_N32_Linux_syscalls 314 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index e8e541b40d86..a5b14f48e1af 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -563,3 +563,5 @@ EXPORT(sys_call_table) PTR sys_process_vm_writev PTR sys_kcmp PTR sys_finit_module + PTR sys_sched_setattr + PTR sys_sched_getattr /* 4350 */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 57e3742fec59..b56e254beb15 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -425,4 +425,6 @@ EXPORT(sys_call_table) PTR sys_kcmp PTR sys_finit_module PTR sys_getdents64 + PTR sys_sched_setattr + PTR sys_sched_getattr /* 5310 */ .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 2f48f5934399..f7e5b72cf481 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -418,4 +418,6 @@ EXPORT(sysn32_call_table) PTR compat_sys_process_vm_writev /* 6310 */ PTR sys_kcmp PTR sys_finit_module + PTR sys_sched_setattr + PTR sys_sched_getattr .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f1acdb429f4f..6788727d91af 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -541,4 +541,6 @@ EXPORT(sys32_call_table) PTR compat_sys_process_vm_writev PTR sys_kcmp PTR sys_finit_module + PTR sys_sched_setattr + PTR sys_sched_getattr /* 4350 */ .size sys32_call_table,.-sys32_call_table From 40507403485fcb56b83d6ddfc954e9b08305054c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 14:41:26 +0000 Subject: [PATCH 099/171] arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k Whilst the text segment for our VDSO is marked as PT_LOAD in the ELF headers, it is mapped by the kernel and not actually subject to demand-paging. ld doesn't realise this, and emits a p_align field of 64k (the maximum supported page size), which conflicts with the load address picked by the kernel on 4k systems, which will be 4k aligned. This causes GDB to fail with "Failed to read a valid object file image from memory" when attempting to load the VDSO. This patch passes the -n option to ld, which prevents it from aligning PT_LOAD segments to the maximum page size. Cc: Reported-by: Kyle McMartin Acked-by: Kyle McMartin Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e62..6d20b7d162d8 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< From 12b13835a0a8bfabea68741e1ab4d4a4cb77d037 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Feb 2014 12:20:01 -0800 Subject: [PATCH 100/171] kbuild: don't enable DEBUG_INFO when building for COMPILE_TEST It really isn't very interesting to have DEBUG_INFO when doing compile coverage stuff (you wouldn't want to run the result anyway, that's kind of the whole point of COMPILE_TEST), and it currently makes the build take longer and use much more disk space for "all{yes,mod}config". There's somewhat active discussion about this still, and we might end up with some new config option for things like this (Andi points out that the silly X86_DECODER_SELFTEST option also slows down the normal coverage tests hugely), but I'm starting the ball rolling with this simple one-liner. DEBUG_INFO isn't that noticeable if you have tons of memory and a good IO subsystem, but it hurts you a lot if you don't - for very little upside for the common use. Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dbf94a7d25a8..a48abeac753f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options" config DEBUG_INFO bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !COMPILE_TEST help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. From fcb6a15c2e7e76d493e6f91ea889ab40e1c643a4 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 3 Feb 2014 08:55:31 -0800 Subject: [PATCH 101/171] intel_pstate: Take core C0 time into account for core busy calculation Take non-idle time into account when calculating core busy time. This ensures that intel_pstate will notice a decrease in load. References: https://bugzilla.kernel.org/show_bug.cgi?id=66581 Cc: 3.10+ # 3.10+ Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7e257b233602..79606f473f48 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -57,6 +57,7 @@ struct sample { int32_t core_pct_busy; u64 aperf; u64 mperf; + unsigned long long tsc; int freq; }; @@ -96,6 +97,7 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; + unsigned long long prev_tsc; int sample_ptr; struct sample samples[SAMPLE_COUNT]; }; @@ -548,30 +550,41 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - core_pct = div64_u64(int_tofp(sample->aperf * 100), - sample->mperf); - sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000); + u64 c0_pct; - sample->core_pct_busy = core_pct; + core_pct = div64_u64(sample->aperf * 100, sample->mperf); + + c0_pct = div64_u64(sample->mperf * 100, sample->tsc); + sample->freq = fp_toint( + mul_fp(int_tofp(cpu->pstate.max_pstate), + int_tofp(core_pct * 1000))); + + sample->core_pct_busy = mul_fp(int_tofp(core_pct), + div_fp(int_tofp(c0_pct + 1), int_tofp(100))); } static inline void intel_pstate_sample(struct cpudata *cpu) { u64 aperf, mperf; + unsigned long long tsc; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + tsc = native_read_tsc(); cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; cpu->samples[cpu->sample_ptr].aperf = aperf; cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].tsc = tsc; cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + cpu->samples[cpu->sample_ptr].tsc -= cpu->prev_tsc; intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; + cpu->prev_tsc = tsc; } static inline void intel_pstate_set_sample_time(struct cpudata *cpu) From 7b320cb1ed2dbd2c5f2a778197baf76fd6bf545a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 4 Feb 2014 09:07:09 +0100 Subject: [PATCH 102/171] pinctrl: protect pinctrl_list add We have few fedora bug reports about list corruption on pinctrl, for example: https://bugzilla.redhat.com/show_bug.cgi?id=1051918 Most likely corruption happen due lack of protection of pinctrl_list when adding new nodes to it. Patch corrects that. Fixes: 42fed7ba44e ("pinctrl: move subsystem mutex to pinctrl_dev struct") Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Stephen Warren Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 5ee61a470016..cab020a58bf5 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -851,7 +851,9 @@ static struct pinctrl *create_pinctrl(struct device *dev) kref_init(&p->users); /* Add the pinctrl handle to the global list */ + mutex_lock(&pinctrl_list_mutex); list_add_tail(&p->node, &pinctrl_list); + mutex_unlock(&pinctrl_list_mutex); return p; } From e18ac62fa4b3f16234bab0d5a6627c57dbae9e7e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 3 Feb 2014 12:59:16 +0200 Subject: [PATCH 103/171] ACPI / video: Add HP EliteBook Revolve 810 to the blacklist On HP EliteBook Revolve 810 the ACPI backlight device doesn't work as expected. For example when resuming from system sleep, it seems to lose backlight settings. Forcing Intel driver fixes the problem so add this machine the ACPI video detect blacklist. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index f0447d3daf2c..a697b77b8865 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -170,6 +170,14 @@ static struct dmi_system_id video_detect_dmi_table[] = { }, { .callback = video_detect_force_vendor, + .ident = "HP EliteBook Revolve 810", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook Revolve 810 G1"), + }, + }, + { + .callback = video_detect_force_vendor, .ident = "Lenovo Yoga 13", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), From eb5ed9a3221a30edc7b48b87af550dbcc0e82c80 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2014 14:56:56 +0300 Subject: [PATCH 104/171] ACPI / utils: remove a pointless NULL check "element" can't be NULL because it is the address of a struct member. Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 0347a37eb438..85e3b612bdc0 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -99,10 +99,6 @@ acpi_extract_package(union acpi_object *package, union acpi_object *element = &(package->package.elements[i]); - if (!element) { - return AE_BAD_DATA; - } - switch (element->type) { case ACPI_TYPE_INTEGER: From 085ca1175cdccc8e400f6d06cf64e209b46021a2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2014 14:55:15 +0300 Subject: [PATCH 105/171] ACPI / proc: remove unneeded NULL check We already verified that "ldev" was non-NULL earlier and also we dereference again without checking a three lines later. Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki --- drivers/acpi/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index 50fe34ffe932..75c28eae8860 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -60,7 +60,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "%c%-8s %s:%s\n", dev->wakeup.flags.run_wake ? '*' : ' ', (device_may_wakeup(&dev->dev) || - (ldev && device_may_wakeup(ldev))) ? + device_may_wakeup(ldev)) ? "enabled" : "disabled", ldev->bus ? ldev->bus->name : "no-bus", dev_name(ldev)); From 47a08c85f70ebdbaaf42b444af57dd6f83e04485 Mon Sep 17 00:00:00 2001 From: "Luis G.F" Date: Tue, 21 Jan 2014 15:40:43 +0100 Subject: [PATCH 106/171] ACPI / battery: Fix incorrect sscanf() string in acpi_battery_init_alarm() Fix incorrect sscanf() string in acpi_battery_init_alarm(). Change from %ld to %lu, because 'x' is unsigned long. Signed-off-by: Luis G.F Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 470e7542bf31..018a42883706 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -549,7 +549,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev, { unsigned long x; struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev)); - if (sscanf(buf, "%ld\n", &x) == 1) + if (sscanf(buf, "%lu\n", &x) == 1) battery->alarm = x/1000; if (acpi_battery_present(battery)) acpi_battery_set_alarm(battery); From ec22b4aa993abbd18f5bbbcb20a1c56be3b1d38b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 5 Feb 2014 14:13:56 +1000 Subject: [PATCH 107/171] drm/mgag200: fix typo causing bw limits to be ignored on some chips mode->mdev otherwise the bw limits never kick in. Reported in RHEL testing. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_mode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index b8583f275e80..968374776db9 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1519,11 +1519,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector, (mga_vga_calculate_mode_bandwidth(mode, bpp) > (32700 * 1024))) { return MODE_BANDWIDTH; - } else if (mode->type == G200_EH && + } else if (mdev->type == G200_EH && (mga_vga_calculate_mode_bandwidth(mode, bpp) > (37500 * 1024))) { return MODE_BANDWIDTH; - } else if (mode->type == G200_ER && + } else if (mdev->type == G200_ER && (mga_vga_calculate_mode_bandwidth(mode, bpp) > (55000 * 1024))) { return MODE_BANDWIDTH; From d3c56568f43807135f2c2a09582a69f809f0d8b7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Feb 2014 09:56:13 +0100 Subject: [PATCH 108/171] ALSA: hda/realtek - Avoid invalid COEFs for ALC271X We've seen often problems after suspend/resume on Acer Aspire One AO725 with ALC271X codec as reported in kernel bugzilla, and it turned out that some COEFs doesn't work and triggers the codec communication stall. Since these magic COEF setups are specific to ALC269VB for some PLL configurations, the machine works even without these manual adjustment. So, let's simply avoid applying them for ALC271X. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52181 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 56a8f1876603..e16a71e586f0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4427,6 +4427,9 @@ static void alc269_fill_coef(struct hda_codec *codec) if (spec->codec_variant != ALC269_TYPE_ALC269VB) return; + /* ALC271X doesn't seem to support these COEFs (bko#52181) */ + if (!strcmp(codec->chip_name, "ALC271X")) + return; if ((alc_get_coef0(codec) & 0x00ff) < 0x015) { alc_write_coef_idx(codec, 0xf, 0x960b); From c7579fed1f1b2567529aea64ef19871337403ab3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Feb 2014 07:28:10 +0100 Subject: [PATCH 109/171] ALSA: hda - Add missing mixer widget for AD1983 The mixer widget on AD1983 at NID 0x0e was missing in the commit [f2f8be43c5c9: ALSA: hda - Add aamix NID to AD codecs]. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70011 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 50b2427f19ca..195cd62cdce5 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -498,6 +498,7 @@ static int patch_ad1983(struct hda_codec *codec) return err; spec = codec->spec; + spec->gen.mixer_nid = 0x0e; spec->gen.beep_nid = 0x10; set_beep_amp(spec, 0x10, 0, HDA_OUTPUT); err = ad198x_parse_auto_config(codec, false); From c20f31ec421ea4fabea5e95a6afd46c5f41e5599 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Feb 2014 11:02:10 +0100 Subject: [PATCH 110/171] ALSA: hda - Fix missing VREF setup for Mac Pro 1,1 Mac Pro 1,1 with ALC889A codec needs the VREF setup on NID 0x18 to VREF50, in order to make the speaker working. The same fixup was already needed for MacBook Air 1,1, so we can reuse it. Reported-by: Nicolai Beuermann Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e16a71e586f0..d9693ca9546f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1821,6 +1821,7 @@ enum { ALC889_FIXUP_IMAC91_VREF, ALC889_FIXUP_MBA11_VREF, ALC889_FIXUP_MBA21_VREF, + ALC889_FIXUP_MP11_VREF, ALC882_FIXUP_INV_DMIC, ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, @@ -2190,6 +2191,12 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC889_FIXUP_MBP_VREF, }, + [ALC889_FIXUP_MP11_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc889_fixup_mba11_vref, + .chained = true, + .chain_id = ALC885_FIXUP_MACPRO_GPIO, + }, [ALC882_FIXUP_INV_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic_0x12, @@ -2253,7 +2260,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF), SND_PCI_QUIRK(0x106b, 0x00a1, "Macbook", ALC889_FIXUP_MBP_VREF), SND_PCI_QUIRK(0x106b, 0x00a4, "MacbookPro 4,1", ALC889_FIXUP_MBP_VREF), - SND_PCI_QUIRK(0x106b, 0x0c00, "Mac Pro", ALC885_FIXUP_MACPRO_GPIO), + SND_PCI_QUIRK(0x106b, 0x0c00, "Mac Pro", ALC889_FIXUP_MP11_VREF), SND_PCI_QUIRK(0x106b, 0x1000, "iMac 24", ALC885_FIXUP_MACPRO_GPIO), SND_PCI_QUIRK(0x106b, 0x2800, "AppleTV", ALC885_FIXUP_MACPRO_GPIO), SND_PCI_QUIRK(0x106b, 0x2c00, "MacbookPro rev3", ALC889_FIXUP_MBP_VREF), From 76c7d18bcddc9794f898ebdee44a3160c636da9c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Jan 2014 10:46:12 +0100 Subject: [PATCH 111/171] drm/vmwgfx: Don't commit staged bindings if execbuf fails If execbuf fails and binding commands are never sent to the device, don't commit the staged context bindings to the tracker. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 7a5f1eb55c5a..3f0b4d1450ff 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -114,8 +114,10 @@ static void vmw_resource_list_unreserve(struct list_head *list, * persistent context binding tracker. */ if (unlikely(val->staged_bindings)) { - vmw_context_binding_state_transfer - (val->res, val->staged_bindings); + if (!backoff) { + vmw_context_binding_state_transfer + (val->res, val->staged_bindings); + } kfree(val->staged_bindings); val->staged_bindings = NULL; } From cf5e3413337309050c05e13dcebe85b7194a21e5 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Jan 2014 10:58:19 +0100 Subject: [PATCH 112/171] drm/vmwgfx: Fix regression caused by "drm/ttm: make ttm reservation calls behave like reservation calls" The call to ttm_eu_backoff_reservation() as part of an error path would cause a lock imbalance if the reservation ticket was not initialized. This error is easily triggered from user-space by submitting a bogus command stream. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Jerome Glisse Cc: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 3f0b4d1450ff..dafa139c0ca7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2195,11 +2195,11 @@ int vmw_execbuf_process(struct drm_file *file_priv, ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands, command_size); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = vmw_resources_reserve(sw_context); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes); if (unlikely(ret != 0)) @@ -2291,10 +2291,11 @@ int vmw_execbuf_process(struct drm_file *file_priv, out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); out_err: + ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes); +out_err_nores: + vmw_resource_list_unreserve(&sw_context->resource_list, true); vmw_resource_relocations_free(&sw_context->res_relocations); vmw_free_relocations(sw_context); - ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes); - vmw_resource_list_unreserve(&sw_context->resource_list, true); vmw_clear_validations(sw_context); if (unlikely(dev_priv->pinned_bo != NULL && !dev_priv->query_cid_valid)) From 0ccbbae43c2dfe45ded1d7ed59b8fc7ac8214fb0 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Jan 2014 11:13:43 +0100 Subject: [PATCH 113/171] drm/vmwgfx: Fix SET_SHADER_CONST emulation on guest-backed devices Emulate the SET_SHADER_CONST legacy command on guest-backed devices by issuing a SET_GB_SHADERCONSTS_INLINE command. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 37 +++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index dafa139c0ca7..9441825c7860 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1528,6 +1528,39 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, return 0; } +/** + * vmw_cmd_set_shader_const - Validate an SVGA_3D_CMD_SET_SHADER_CONST + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_set_shader_const(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_set_shader_const_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdSetShaderConst body; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_set_shader_const_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (dev_priv->has_mob) + header->id = SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE; + + return 0; +} + /** * vmw_cmd_bind_gb_shader - Validate an SVGA_3D_CMD_BIND_GB_SHADER * command @@ -1642,8 +1675,8 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = { true, true, false), VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader, true, false, false), - VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check, - true, true, false), + VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const, + true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw, true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check, From c1a21373d2cb94a7808161a8c237b249cd799ce7 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 30 Jan 2014 11:18:38 +0100 Subject: [PATCH 114/171] drm/vmwgfx: Fix legacy surface reference size copyback Surfaces created using the guest-backed surface interface only keeps the base mip size, so only copy that if the legacy surface reference ioctl requests the size information. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 979da1c246a5..ec2b99833fce 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -908,8 +908,8 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data, rep->size_addr; if (user_sizes) - ret = copy_to_user(user_sizes, srf->sizes, - srf->num_sizes * sizeof(*srf->sizes)); + ret = copy_to_user(user_sizes, &srf->base_size, + sizeof(srf->base_size)); if (unlikely(ret != 0)) { DRM_ERROR("copy_to_user failed %p %u\n", user_sizes, srf->num_sizes); From d5bde956630b86462ee22055f5816a04290aed57 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 31 Jan 2014 10:12:10 +0100 Subject: [PATCH 115/171] drm/vmwgfx: Emulate legacy shaders on guest-backed devices v2 Command stream legacy shader creation and destruction is replaced by NOPs in the command stream, and instead guest-backed shaders are created and destroyed as part of the command validation process. v2: Removed some stray debug messages. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 29 +- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 197 ++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 489 +++++++++++++++++++++--- 4 files changed, 632 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9893328f8fdc..3bdc0adc656d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -941,6 +941,7 @@ static void vmw_postclose(struct drm_device *dev, drm_master_put(&vmw_fp->locked_master); } + vmw_compat_shader_man_destroy(vmw_fp->shman); ttm_object_file_release(&vmw_fp->tfile); kfree(vmw_fp); } @@ -960,11 +961,17 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv) if (unlikely(vmw_fp->tfile == NULL)) goto out_no_tfile; + vmw_fp->shman = vmw_compat_shader_man_create(dev_priv); + if (IS_ERR(vmw_fp->shman)) + goto out_no_shman; + file_priv->driver_priv = vmw_fp; dev_priv->bdev.dev_mapping = dev->dev_mapping; return 0; +out_no_shman: + ttm_object_file_release(&vmw_fp->tfile); out_no_tfile: kfree(vmw_fp); return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 554e7fa33082..cef0ff7ac738 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -75,10 +75,14 @@ #define VMW_RES_FENCE ttm_driver_type3 #define VMW_RES_SHADER ttm_driver_type4 +struct vmw_compat_shader_manager; + struct vmw_fpriv { struct drm_master *locked_master; struct ttm_object_file *tfile; struct list_head fence_events; + bool gb_aware; + struct vmw_compat_shader_manager *shman; }; struct vmw_dma_buffer { @@ -318,7 +322,7 @@ struct vmw_sw_context{ struct drm_open_hash res_ht; bool res_ht_initialized; bool kernel; /**< is the called made from the kernel */ - struct ttm_object_file *tfile; + struct vmw_fpriv *fp; struct list_head validate_nodes; struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS]; uint32_t cur_reloc; @@ -336,6 +340,7 @@ struct vmw_sw_context{ bool needs_post_query_barrier; struct vmw_resource *error_resource; struct vmw_ctx_binding_state staged_bindings; + struct list_head staged_shaders; }; struct vmw_legacy_display; @@ -991,6 +996,28 @@ extern int vmw_shader_define_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man, + SVGA3dShaderType shader_type, + u32 *user_key); +extern void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man, + struct list_head *list); +extern void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man, + struct list_head *list); +extern int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man, + u32 user_key, + SVGA3dShaderType shader_type, + struct list_head *list); +extern int vmw_compat_shader_add(struct vmw_compat_shader_manager *man, + u32 user_key, const void *bytecode, + SVGA3dShaderType shader_type, + size_t size, + struct ttm_object_file *tfile, + struct list_head *list); +extern struct vmw_compat_shader_manager * +vmw_compat_shader_man_create(struct vmw_private *dev_priv); +extern void +vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man); + /** * Inline helper functions diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 9441825c7860..352224b9d667 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -235,8 +235,12 @@ static void vmw_resource_relocations_apply(uint32_t *cb, { struct vmw_resource_relocation *rel; - list_for_each_entry(rel, list, head) - cb[rel->offset] = rel->res->id; + list_for_each_entry(rel, list, head) { + if (likely(rel->res != NULL)) + cb[rel->offset] = rel->res->id; + else + cb[rel->offset] = SVGA_3D_CMD_NOP; + } } static int vmw_cmd_invalid(struct vmw_private *dev_priv, @@ -381,22 +385,27 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context) } /** - * vmw_cmd_res_check - Check that a resource is present and if so, put it + * vmw_cmd_compat_res_check - Check that a resource is present and if so, put it * on the resource validate list unless it's already there. * * @dev_priv: Pointer to a device private structure. * @sw_context: Pointer to the software context. * @res_type: Resource type. * @converter: User-space visisble type specific information. - * @id: Pointer to the location in the command buffer currently being + * @id: user-space resource id handle. + * @id_loc: Pointer to the location in the command buffer currently being * parsed from where the user-space resource id handle is located. + * @p_val: Pointer to pointer to resource validalidation node. Populated + * on exit. */ -static int vmw_cmd_res_check(struct vmw_private *dev_priv, - struct vmw_sw_context *sw_context, - enum vmw_res_type res_type, - const struct vmw_user_resource_conv *converter, - uint32_t *id, - struct vmw_resource_val_node **p_val) +static int +vmw_cmd_compat_res_check(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + enum vmw_res_type res_type, + const struct vmw_user_resource_conv *converter, + uint32_t id, + uint32_t *id_loc, + struct vmw_resource_val_node **p_val) { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; @@ -404,7 +413,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, struct vmw_resource_val_node *node; int ret; - if (*id == SVGA3D_INVALID_ID) { + if (id == SVGA3D_INVALID_ID) { if (p_val) *p_val = NULL; if (res_type == vmw_res_context) { @@ -419,7 +428,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, * resource */ - if (likely(rcache->valid && *id == rcache->handle)) { + if (likely(rcache->valid && id == rcache->handle)) { const struct vmw_resource *res = rcache->res; rcache->node->first_usage = false; @@ -428,28 +437,28 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv, return vmw_resource_relocation_add (&sw_context->res_relocations, res, - id - sw_context->buf_start); + id_loc - sw_context->buf_start); } ret = vmw_user_resource_lookup_handle(dev_priv, - sw_context->tfile, - *id, + sw_context->fp->tfile, + id, converter, &res); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use resource 0x%08x.\n", - (unsigned) *id); + (unsigned) id); dump_stack(); return ret; } rcache->valid = true; rcache->res = res; - rcache->handle = *id; + rcache->handle = id; ret = vmw_resource_relocation_add(&sw_context->res_relocations, res, - id - sw_context->buf_start); + id_loc - sw_context->buf_start); if (unlikely(ret != 0)) goto out_no_reloc; @@ -482,6 +491,31 @@ out_no_reloc: return ret; } +/** + * vmw_cmd_res_check - Check that a resource is present and if so, put it + * on the resource validate list unless it's already there. + * + * @dev_priv: Pointer to a device private structure. + * @sw_context: Pointer to the software context. + * @res_type: Resource type. + * @converter: User-space visisble type specific information. + * @id_loc: Pointer to the location in the command buffer currently being + * parsed from where the user-space resource id handle is located. + * @p_val: Pointer to pointer to resource validalidation node. Populated + * on exit. + */ +static int +vmw_cmd_res_check(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + enum vmw_res_type res_type, + const struct vmw_user_resource_conv *converter, + uint32_t *id_loc, + struct vmw_resource_val_node **p_val) +{ + return vmw_cmd_compat_res_check(dev_priv, sw_context, res_type, + converter, *id_loc, id_loc, p_val); +} + /** * vmw_cmd_cid_check - Check a command header for valid context information. * @@ -769,7 +803,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); return -EINVAL; @@ -830,7 +864,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); return -EINVAL; @@ -1129,7 +1163,8 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res); - vmw_kms_cursor_snoop(srf, sw_context->tfile, &vmw_bo->base, header); + vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->base, + header); out_no_surface: vmw_dmabuf_unreference(&vmw_bo); @@ -1480,6 +1515,98 @@ static int vmw_cmd_invalidate_gb_surface(struct vmw_private *dev_priv, &cmd->body.sid, NULL); } + +/** + * vmw_cmd_shader_define - Validate an SVGA_3D_CMD_SHADER_DEFINE + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_shader_define(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_shader_define_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdDefineShader body; + } *cmd; + int ret; + size_t size; + + cmd = container_of(header, struct vmw_shader_define_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (unlikely(!dev_priv->has_mob)) + return 0; + + size = cmd->header.size - sizeof(cmd->body); + ret = vmw_compat_shader_add(sw_context->fp->shman, + cmd->body.shid, cmd + 1, + cmd->body.type, size, + sw_context->fp->tfile, + &sw_context->staged_shaders); + if (unlikely(ret != 0)) + return ret; + + return vmw_resource_relocation_add(&sw_context->res_relocations, + NULL, &cmd->header.id - + sw_context->buf_start); + + return 0; +} + +/** + * vmw_cmd_shader_destroy - Validate an SVGA_3D_CMD_SHADER_DESTROY + * command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_shader_destroy_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdDestroyShader body; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_shader_destroy_cmd, + header); + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context, + user_context_converter, &cmd->body.cid, + NULL); + if (unlikely(ret != 0)) + return ret; + + if (unlikely(!dev_priv->has_mob)) + return 0; + + ret = vmw_compat_shader_remove(sw_context->fp->shman, + cmd->body.shid, + cmd->body.type, + &sw_context->staged_shaders); + if (unlikely(ret != 0)) + return ret; + + return vmw_resource_relocation_add(&sw_context->res_relocations, + NULL, &cmd->header.id - + sw_context->buf_start); + + return 0; +} + /** * vmw_cmd_set_shader - Validate an SVGA_3D_CMD_SET_SHADER * command @@ -1511,10 +1638,17 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, if (dev_priv->has_mob) { struct vmw_ctx_bindinfo bi; struct vmw_resource_val_node *res_node; + u32 shid = cmd->body.shid; - ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_shader, - user_shader_converter, - &cmd->body.shid, &res_node); + (void) vmw_compat_shader_lookup(sw_context->fp->shman, + cmd->body.type, + &shid); + + ret = vmw_cmd_compat_res_check(dev_priv, sw_context, + vmw_res_shader, + user_shader_converter, + shid, + &cmd->body.shid, &res_node); if (unlikely(ret != 0)) return ret; @@ -1669,10 +1803,10 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = { true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check, false, false, false), - VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check, - true, true, false), - VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check, - true, true, false), + VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_shader_define, + true, false, false), + VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_shader_destroy, + true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader, true, false, false), VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const, @@ -2206,7 +2340,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, } else sw_context->kernel = true; - sw_context->tfile = vmw_fpriv(file_priv)->tfile; + sw_context->fp = vmw_fpriv(file_priv); sw_context->cur_reloc = 0; sw_context->cur_val_buf = 0; sw_context->fence_flags = 0; @@ -2223,6 +2357,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, goto out_unlock; sw_context->res_ht_initialized = true; } + INIT_LIST_HEAD(&sw_context->staged_shaders); INIT_LIST_HEAD(&resource_list); ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands, @@ -2311,6 +2446,8 @@ int vmw_execbuf_process(struct drm_file *file_priv, } list_splice_init(&sw_context->resource_list, &resource_list); + vmw_compat_shaders_commit(sw_context->fp->shman, + &sw_context->staged_shaders); mutex_unlock(&dev_priv->cmdbuf_mutex); /* @@ -2337,6 +2474,8 @@ out_unlock: list_splice_init(&sw_context->resource_list, &resource_list); error_resource = sw_context->error_resource; sw_context->error_resource = NULL; + vmw_compat_shaders_revert(sw_context->fp->shman, + &sw_context->staged_shaders); mutex_unlock(&dev_priv->cmdbuf_mutex); /* diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 1457ec4b7125..be85d7fdfb5b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -29,6 +29,8 @@ #include "vmwgfx_resource_priv.h" #include "ttm/ttm_placement.h" +#define VMW_COMPAT_SHADER_HT_ORDER 12 + struct vmw_shader { struct vmw_resource res; SVGA3dShaderType type; @@ -40,6 +42,50 @@ struct vmw_user_shader { struct vmw_shader shader; }; +/** + * enum vmw_compat_shader_state - Staging state for compat shaders + */ +enum vmw_compat_shader_state { + VMW_COMPAT_COMMITED, + VMW_COMPAT_ADD, + VMW_COMPAT_DEL +}; + +/** + * struct vmw_compat_shader - Metadata for compat shaders. + * + * @handle: The TTM handle of the guest backed shader. + * @tfile: The struct ttm_object_file the guest backed shader is registered + * with. + * @hash: Hash item for lookup. + * @head: List head for staging lists or the compat shader manager list. + * @state: Staging state. + * + * The structure is protected by the cmdbuf lock. + */ +struct vmw_compat_shader { + u32 handle; + struct ttm_object_file *tfile; + struct drm_hash_item hash; + struct list_head head; + enum vmw_compat_shader_state state; +}; + +/** + * struct vmw_compat_shader_manager - Compat shader manager. + * + * @shaders: Hash table containing staged and commited compat shaders + * @list: List of commited shaders. + * @dev_priv: Pointer to a device private structure. + * + * @shaders and @list are protected by the cmdbuf mutex for now. + */ +struct vmw_compat_shader_manager { + struct drm_open_hash shaders; + struct list_head list; + struct vmw_private *dev_priv; +}; + static void vmw_user_shader_free(struct vmw_resource *res); static struct vmw_resource * vmw_user_shader_base_to_res(struct ttm_base_object *base); @@ -325,13 +371,81 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data, TTM_REF_USAGE); } +int vmw_shader_alloc(struct vmw_private *dev_priv, + struct vmw_dma_buffer *buffer, + size_t shader_size, + size_t offset, + SVGA3dShaderType shader_type, + struct ttm_object_file *tfile, + u32 *handle) +{ + struct vmw_user_shader *ushader; + struct vmw_resource *res, *tmp; + int ret; + + /* + * Approximate idr memory usage with 128 bytes. It will be limited + * by maximum number_of shaders anyway. + */ + if (unlikely(vmw_user_shader_size == 0)) + vmw_user_shader_size = + ttm_round_pot(sizeof(struct vmw_user_shader)) + 128; + + ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), + vmw_user_shader_size, + false, true); + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Out of graphics memory for shader " + "creation.\n"); + goto out; + } + + ushader = kzalloc(sizeof(*ushader), GFP_KERNEL); + if (unlikely(ushader == NULL)) { + ttm_mem_global_free(vmw_mem_glob(dev_priv), + vmw_user_shader_size); + ret = -ENOMEM; + goto out; + } + + res = &ushader->shader.res; + ushader->base.shareable = false; + ushader->base.tfile = NULL; + + /* + * From here on, the destructor takes over resource freeing. + */ + + ret = vmw_gb_shader_init(dev_priv, res, shader_size, + offset, shader_type, buffer, + vmw_user_shader_free); + if (unlikely(ret != 0)) + goto out; + + tmp = vmw_resource_reference(res); + ret = ttm_base_object_init(tfile, &ushader->base, false, + VMW_RES_SHADER, + &vmw_user_shader_base_release, NULL); + + if (unlikely(ret != 0)) { + vmw_resource_unreference(&tmp); + goto out_err; + } + + if (handle) + *handle = ushader->base.hash.key; +out_err: + vmw_resource_unreference(&res); +out: + return ret; +} + + int vmw_shader_define_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_user_shader *ushader; - struct vmw_resource *res; - struct vmw_resource *tmp; struct drm_vmw_shader_create_arg *arg = (struct drm_vmw_shader_create_arg *)data; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; @@ -373,69 +487,324 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data, goto out_bad_arg; } - /* - * Approximate idr memory usage with 128 bytes. It will be limited - * by maximum number_of shaders anyway. - */ - - if (unlikely(vmw_user_shader_size == 0)) - vmw_user_shader_size = ttm_round_pot(sizeof(*ushader)) - + 128; - ret = ttm_read_lock(&vmaster->lock, true); if (unlikely(ret != 0)) - return ret; + goto out_bad_arg; - ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv), - vmw_user_shader_size, - false, true); - if (unlikely(ret != 0)) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Out of graphics memory for shader" - " creation.\n"); - goto out_unlock; - } + ret = vmw_shader_alloc(dev_priv, buffer, arg->size, arg->offset, + shader_type, tfile, &arg->shader_handle); - ushader = kzalloc(sizeof(*ushader), GFP_KERNEL); - if (unlikely(ushader == NULL)) { - ttm_mem_global_free(vmw_mem_glob(dev_priv), - vmw_user_shader_size); - ret = -ENOMEM; - goto out_unlock; - } - - res = &ushader->shader.res; - ushader->base.shareable = false; - ushader->base.tfile = NULL; - - /* - * From here on, the destructor takes over resource freeing. - */ - - ret = vmw_gb_shader_init(dev_priv, res, arg->size, - arg->offset, shader_type, buffer, - vmw_user_shader_free); - if (unlikely(ret != 0)) - goto out_unlock; - - tmp = vmw_resource_reference(res); - ret = ttm_base_object_init(tfile, &ushader->base, false, - VMW_RES_SHADER, - &vmw_user_shader_base_release, NULL); - - if (unlikely(ret != 0)) { - vmw_resource_unreference(&tmp); - goto out_err; - } - - arg->shader_handle = ushader->base.hash.key; -out_err: - vmw_resource_unreference(&res); -out_unlock: ttm_read_unlock(&vmaster->lock); out_bad_arg: vmw_dmabuf_unreference(&buffer); - return ret; - +} + +/** + * vmw_compat_shader_lookup - Look up a compat shader + * + * @man: Pointer to the compat shader manager. + * @shader_type: The shader type, that combined with the user_key identifies + * the shader. + * @user_key: On entry, this should be a pointer to the user_key. + * On successful exit, it will contain the guest-backed shader's TTM handle. + * + * Returns 0 on success. Non-zero on failure, in which case the value pointed + * to by @user_key is unmodified. + */ +int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man, + SVGA3dShaderType shader_type, + u32 *user_key) +{ + struct drm_hash_item *hash; + int ret; + unsigned long key = *user_key | (shader_type << 24); + + ret = drm_ht_find_item(&man->shaders, key, &hash); + if (unlikely(ret != 0)) + return ret; + + *user_key = drm_hash_entry(hash, struct vmw_compat_shader, + hash)->handle; + + return 0; +} + +/** + * vmw_compat_shader_free - Free a compat shader. + * + * @man: Pointer to the compat shader manager. + * @entry: Pointer to a struct vmw_compat_shader. + * + * Frees a struct vmw_compat_shder entry and drops its reference to the + * guest backed shader. + */ +static void vmw_compat_shader_free(struct vmw_compat_shader_manager *man, + struct vmw_compat_shader *entry) +{ + list_del(&entry->head); + WARN_ON(drm_ht_remove_item(&man->shaders, &entry->hash)); + WARN_ON(ttm_ref_object_base_unref(entry->tfile, entry->handle, + TTM_REF_USAGE)); + kfree(entry); +} + +/** + * vmw_compat_shaders_commit - Commit a list of compat shader actions. + * + * @man: Pointer to the compat shader manager. + * @list: Caller's list of compat shader actions. + * + * This function commits a list of compat shader additions or removals. + * It is typically called when the execbuf ioctl call triggering these + * actions has commited the fifo contents to the device. + */ +void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man, + struct list_head *list) +{ + struct vmw_compat_shader *entry, *next; + + list_for_each_entry_safe(entry, next, list, head) { + list_del(&entry->head); + switch (entry->state) { + case VMW_COMPAT_ADD: + entry->state = VMW_COMPAT_COMMITED; + list_add_tail(&entry->head, &man->list); + break; + case VMW_COMPAT_DEL: + ttm_ref_object_base_unref(entry->tfile, entry->handle, + TTM_REF_USAGE); + kfree(entry); + break; + default: + BUG(); + break; + } + } +} + +/** + * vmw_compat_shaders_revert - Revert a list of compat shader actions + * + * @man: Pointer to the compat shader manager. + * @list: Caller's list of compat shader actions. + * + * This function reverts a list of compat shader additions or removals. + * It is typically called when the execbuf ioctl call triggering these + * actions failed for some reason, and the command stream was never + * submitted. + */ +void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man, + struct list_head *list) +{ + struct vmw_compat_shader *entry, *next; + int ret; + + list_for_each_entry_safe(entry, next, list, head) { + switch (entry->state) { + case VMW_COMPAT_ADD: + vmw_compat_shader_free(man, entry); + break; + case VMW_COMPAT_DEL: + ret = drm_ht_insert_item(&man->shaders, &entry->hash); + list_del(&entry->head); + list_add_tail(&entry->head, &man->list); + entry->state = VMW_COMPAT_COMMITED; + break; + default: + BUG(); + break; + } + } +} + +/** + * vmw_compat_shader_remove - Stage a compat shader for removal. + * + * @man: Pointer to the compat shader manager + * @user_key: The key that is used to identify the shader. The key is + * unique to the shader type. + * @shader_type: Shader type. + * @list: Caller's list of staged shader actions. + * + * This function stages a compat shader for removal and removes the key from + * the shader manager's hash table. If the shader was previously only staged + * for addition it is completely removed (But the execbuf code may keep a + * reference if it was bound to a context between addition and removal). If + * it was previously commited to the manager, it is staged for removal. + */ +int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man, + u32 user_key, SVGA3dShaderType shader_type, + struct list_head *list) +{ + struct vmw_compat_shader *entry; + struct drm_hash_item *hash; + int ret; + + ret = drm_ht_find_item(&man->shaders, user_key | (shader_type << 24), + &hash); + if (likely(ret != 0)) + return -EINVAL; + + entry = drm_hash_entry(hash, struct vmw_compat_shader, hash); + + switch (entry->state) { + case VMW_COMPAT_ADD: + vmw_compat_shader_free(man, entry); + break; + case VMW_COMPAT_COMMITED: + (void) drm_ht_remove_item(&man->shaders, &entry->hash); + list_del(&entry->head); + entry->state = VMW_COMPAT_DEL; + list_add_tail(&entry->head, list); + break; + default: + BUG(); + break; + } + + return 0; +} + +/** + * vmw_compat_shader_add - Create a compat shader and add the + * key to the manager + * + * @man: Pointer to the compat shader manager + * @user_key: The key that is used to identify the shader. The key is + * unique to the shader type. + * @bytecode: Pointer to the bytecode of the shader. + * @shader_type: Shader type. + * @tfile: Pointer to a struct ttm_object_file that the guest-backed shader is + * to be created with. + * @list: Caller's list of staged shader actions. + * + * Note that only the key is added to the shader manager's hash table. + * The shader is not yet added to the shader manager's list of shaders. + */ +int vmw_compat_shader_add(struct vmw_compat_shader_manager *man, + u32 user_key, const void *bytecode, + SVGA3dShaderType shader_type, + size_t size, + struct ttm_object_file *tfile, + struct list_head *list) +{ + struct vmw_dma_buffer *buf; + struct ttm_bo_kmap_obj map; + bool is_iomem; + struct vmw_compat_shader *compat; + u32 handle; + int ret; + + if (user_key > ((1 << 24) - 1) || (unsigned) shader_type > 16) + return -EINVAL; + + /* Allocate and pin a DMA buffer */ + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (unlikely(buf == NULL)) + return -ENOMEM; + + ret = vmw_dmabuf_init(man->dev_priv, buf, size, &vmw_sys_ne_placement, + true, vmw_dmabuf_bo_free); + if (unlikely(ret != 0)) + goto out; + + ret = ttm_bo_reserve(&buf->base, false, true, false, NULL); + if (unlikely(ret != 0)) + goto no_reserve; + + /* Map and copy shader bytecode. */ + ret = ttm_bo_kmap(&buf->base, 0, PAGE_ALIGN(size) >> PAGE_SHIFT, + &map); + if (unlikely(ret != 0)) { + ttm_bo_unreserve(&buf->base); + goto no_reserve; + } + + memcpy(ttm_kmap_obj_virtual(&map, &is_iomem), bytecode, size); + WARN_ON(is_iomem); + + ttm_bo_kunmap(&map); + ret = ttm_bo_validate(&buf->base, &vmw_sys_placement, false, true); + WARN_ON(ret != 0); + ttm_bo_unreserve(&buf->base); + + /* Create a guest-backed shader container backed by the dma buffer */ + ret = vmw_shader_alloc(man->dev_priv, buf, size, 0, shader_type, + tfile, &handle); + vmw_dmabuf_unreference(&buf); + if (unlikely(ret != 0)) + goto no_reserve; + /* + * Create a compat shader structure and stage it for insertion + * in the manager + */ + compat = kzalloc(sizeof(*compat), GFP_KERNEL); + if (compat == NULL) + goto no_compat; + + compat->hash.key = user_key | (shader_type << 24); + ret = drm_ht_insert_item(&man->shaders, &compat->hash); + if (unlikely(ret != 0)) + goto out_invalid_key; + + compat->state = VMW_COMPAT_ADD; + compat->handle = handle; + compat->tfile = tfile; + list_add_tail(&compat->head, list); + + return 0; + +out_invalid_key: + kfree(compat); +no_compat: + ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); +no_reserve: +out: + return ret; +} + +/** + * vmw_compat_shader_man_create - Create a compat shader manager + * + * @dev_priv: Pointer to a device private structure. + * + * Typically done at file open time. If successful returns a pointer to a + * compat shader manager. Otherwise returns an error pointer. + */ +struct vmw_compat_shader_manager * +vmw_compat_shader_man_create(struct vmw_private *dev_priv) +{ + struct vmw_compat_shader_manager *man; + int ret; + + man = kzalloc(sizeof(*man), GFP_KERNEL); + + man->dev_priv = dev_priv; + INIT_LIST_HEAD(&man->list); + ret = drm_ht_create(&man->shaders, VMW_COMPAT_SHADER_HT_ORDER); + if (ret == 0) + return man; + + kfree(man); + return ERR_PTR(ret); +} + +/** + * vmw_compat_shader_man_destroy - Destroy a compat shader manager + * + * @man: Pointer to the shader manager to destroy. + * + * Typically done at file close time. + */ +void vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man) +{ + struct vmw_compat_shader *entry, *next; + + mutex_lock(&man->dev_priv->cmdbuf_mutex); + list_for_each_entry_safe(entry, next, &man->list, head) + vmw_compat_shader_free(man, entry); + + mutex_unlock(&man->dev_priv->cmdbuf_mutex); + kfree(man); } From a6fc955ff9fc60dcffc80003410a85d874e4f1e3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 31 Jan 2014 10:21:10 +0100 Subject: [PATCH 116/171] drm/vmwgfx: Detect old user-space drivers and set up legacy emulation v2 GB aware mesa userspace drivers are detected by the fact that they are calling the vmw getparam ioctl querying DRM_VMW_PARAM_HW_CAPS to detect whether the device is Guest-backed object capable. For other drivers, lie about hardware version and send the 3D capabilities in a format they expect. v2: Use DRM_VMW_PARAM_MAX_MOB_MEMORY to detect gb awareness, Make sure we don't ovwerwrite bounce buffer or write past user-space buffer indicated size. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/svga3d_reg.h | 24 +++++++ drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 93 ++++++++++++++++++++++----- 2 files changed, 101 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h index d95335cb90bd..b645647b7776 100644 --- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h +++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h @@ -2583,4 +2583,28 @@ typedef union { float f; } SVGA3dDevCapResult; +typedef enum { + SVGA3DCAPS_RECORD_UNKNOWN = 0, + SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff, +} SVGA3dCapsRecordType; + +typedef +struct SVGA3dCapsRecordHeader { + uint32 length; + SVGA3dCapsRecordType type; +} +SVGA3dCapsRecordHeader; + +typedef +struct SVGA3dCapsRecord { + SVGA3dCapsRecordHeader header; + uint32 data[1]; +} +SVGA3dCapsRecord; + + +typedef uint32 SVGA3dCapPair[2]; + #endif /* _SVGA3D_REG_H_ */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 116c49736763..f9881f9e62bd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -29,12 +29,18 @@ #include #include "vmwgfx_kms.h" +struct svga_3d_compat_cap { + SVGA3dCapsRecordHeader header; + SVGA3dCapPair pairs[SVGA3D_DEVCAP_MAX]; +}; + int vmw_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); struct drm_vmw_getparam_arg *param = (struct drm_vmw_getparam_arg *)data; + struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); switch (param->param) { case DRM_VMW_PARAM_NUM_STREAMS: @@ -60,6 +66,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, __le32 __iomem *fifo_mem = dev_priv->mmio_virt; const struct vmw_fifo_state *fifo = &dev_priv->fifo; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) { + param->value = SVGA3D_HWVERSION_WS8_B1; + break; + } + param->value = ioread32(fifo_mem + ((fifo->capabilities & @@ -69,17 +80,26 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, break; } case DRM_VMW_PARAM_MAX_SURF_MEMORY: - param->value = dev_priv->memory_size; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) && + !vmw_fp->gb_aware) + param->value = dev_priv->max_mob_pages * PAGE_SIZE / 2; + else + param->value = dev_priv->memory_size; break; case DRM_VMW_PARAM_3D_CAPS_SIZE: - if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) - param->value = SVGA3D_DEVCAP_MAX; + if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) && + vmw_fp->gb_aware) + param->value = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); + else if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) + param->value = sizeof(struct svga_3d_compat_cap) + + sizeof(uint32_t); else param->value = (SVGA_FIFO_3D_CAPS_LAST - - SVGA_FIFO_3D_CAPS + 1); - param->value *= sizeof(uint32_t); + SVGA_FIFO_3D_CAPS + 1) * + sizeof(uint32_t); break; case DRM_VMW_PARAM_MAX_MOB_MEMORY: + vmw_fp->gb_aware = true; param->value = dev_priv->max_mob_pages * PAGE_SIZE; break; default: @@ -91,6 +111,38 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, return 0; } +static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, + size_t size) +{ + struct svga_3d_compat_cap *compat_cap = + (struct svga_3d_compat_cap *) bounce; + unsigned int i; + size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs); + unsigned int max_size; + + if (size < pair_offset) + return -EINVAL; + + max_size = (size - pair_offset) / sizeof(SVGA3dCapPair); + + if (max_size > SVGA3D_DEVCAP_MAX) + max_size = SVGA3D_DEVCAP_MAX; + + compat_cap->header.length = + (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); + compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS; + + mutex_lock(&dev_priv->hw_mutex); + for (i = 0; i < max_size; ++i) { + vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); + compat_cap->pairs[i][0] = i; + compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP); + } + mutex_unlock(&dev_priv->hw_mutex); + + return 0; +} + int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -104,41 +156,49 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, void *bounce; int ret; bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS); + struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); if (unlikely(arg->pad64 != 0)) { DRM_ERROR("Illegal GET_3D_CAP argument.\n"); return -EINVAL; } - if (gb_objects) - size = SVGA3D_DEVCAP_MAX; + if (gb_objects && vmw_fp->gb_aware) + size = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); + else if (gb_objects) + size = sizeof(struct svga_3d_compat_cap) + sizeof(uint32_t); else - size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1); - - size *= sizeof(uint32_t); + size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) * + sizeof(uint32_t); if (arg->max_size < size) size = arg->max_size; - bounce = vmalloc(size); + bounce = vzalloc(size); if (unlikely(bounce == NULL)) { DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n"); return -ENOMEM; } - if (gb_objects) { - int i; + if (gb_objects && vmw_fp->gb_aware) { + int i, num; uint32_t *bounce32 = (uint32_t *) bounce; + num = size / sizeof(uint32_t); + if (num > SVGA3D_DEVCAP_MAX) + num = SVGA3D_DEVCAP_MAX; + mutex_lock(&dev_priv->hw_mutex); - for (i = 0; i < SVGA3D_DEVCAP_MAX; ++i) { + for (i = 0; i < num; ++i) { vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); *bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP); } mutex_unlock(&dev_priv->hw_mutex); - + } else if (gb_objects) { + ret = vmw_fill_compat_cap(dev_priv, bounce, size); + if (unlikely(ret != 0)) + goto out_err; } else { - fifo_mem = dev_priv->mmio_virt; memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size); } @@ -146,6 +206,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, ret = copy_to_user(buffer, bounce, size); if (ret) ret = -EFAULT; +out_err: vfree(bounce); if (unlikely(ret != 0)) From 30f82d816d2dccfdc2063ac8cca994904c9b612c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 5 Feb 2014 08:13:56 +0100 Subject: [PATCH 117/171] drm/vmwgfx: Reemit context bindings when necessary v2 When a context is first referenced in the command stream, make sure that all scrubbed (as a result of eviction) bindings are re-emitted. Also make sure that all bound resources are put on the resource validate list. This is needed for legacy emulation, since legacy user-space drivers will typically not re-emit shader bindings. It also removes the requirement for user-space drivers to re-emit render-target- and texture bindings. Makes suspend and hibernate now also work with legacy user-space drivers on guest-backed devices. v2: Don't rebind on legacy devices. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_context.c | 144 +++++++++++++++++++---- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 + drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 85 ++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 11 +- drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 +- 6 files changed, 222 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index 82c41daebc0e..9426c53fb483 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -37,7 +37,7 @@ struct vmw_user_context { -typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *); +typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *, bool); static void vmw_user_context_free(struct vmw_resource *res); static struct vmw_resource * @@ -50,9 +50,11 @@ static int vmw_gb_context_unbind(struct vmw_resource *res, bool readback, struct ttm_validate_buffer *val_buf); static int vmw_gb_context_destroy(struct vmw_resource *res); -static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi); -static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi); -static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi); +static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind); +static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi, + bool rebind); +static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, bool rebind); +static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs); static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs); static uint64_t vmw_user_context_size; @@ -111,10 +113,14 @@ static void vmw_hw_context_destroy(struct vmw_resource *res) if (res->func->destroy == vmw_gb_context_destroy) { mutex_lock(&dev_priv->cmdbuf_mutex); + mutex_lock(&dev_priv->binding_mutex); + (void) vmw_context_binding_state_kill + (&container_of(res, struct vmw_user_context, res)->cbs); (void) vmw_gb_context_destroy(res); if (dev_priv->pinned_bo != NULL && !dev_priv->query_cid_valid) __vmw_execbuf_release_pinned_bo(dev_priv, NULL); + mutex_unlock(&dev_priv->binding_mutex); mutex_unlock(&dev_priv->cmdbuf_mutex); return; } @@ -328,7 +334,7 @@ static int vmw_gb_context_unbind(struct vmw_resource *res, BUG_ON(bo->mem.mem_type != VMW_PL_MOB); mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_state_kill(&uctx->cbs); + vmw_context_binding_state_scrub(&uctx->cbs); submit_size = sizeof(*cmd2) + (readback ? sizeof(*cmd1) : 0); @@ -378,10 +384,6 @@ static int vmw_gb_context_destroy(struct vmw_resource *res) SVGA3dCmdHeader header; SVGA3dCmdDestroyGBContext body; } *cmd; - struct vmw_user_context *uctx = - container_of(res, struct vmw_user_context, res); - - BUG_ON(!list_empty(&uctx->cbs.list)); if (likely(res->id == -1)) return 0; @@ -528,8 +530,9 @@ out_unlock: * vmw_context_scrub_shader - scrub a shader binding from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. */ -static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -548,7 +551,8 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) cmd->header.size = sizeof(cmd->body); cmd->body.cid = bi->ctx->id; cmd->body.type = bi->i1.shader_type; - cmd->body.shid = SVGA3D_INVALID_ID; + cmd->body.shid = + cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID); vmw_fifo_commit(dev_priv, sizeof(*cmd)); return 0; @@ -559,8 +563,10 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi) * from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. */ -static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi, + bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -579,7 +585,8 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) cmd->header.size = sizeof(cmd->body); cmd->body.cid = bi->ctx->id; cmd->body.type = bi->i1.rt_type; - cmd->body.target.sid = SVGA3D_INVALID_ID; + cmd->body.target.sid = + cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID); cmd->body.target.face = 0; cmd->body.target.mipmap = 0; vmw_fifo_commit(dev_priv, sizeof(*cmd)); @@ -591,11 +598,13 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi) * vmw_context_scrub_texture - scrub a texture binding from a context. * * @bi: single binding information. + * @rebind: Whether to issue a bind instead of scrub command. * * TODO: Possibly complement this function with a function that takes * a list of texture bindings and combines them to a single command. */ -static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi) +static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, + bool rebind) { struct vmw_private *dev_priv = bi->ctx->dev_priv; struct { @@ -619,7 +628,8 @@ static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi) cmd->body.c.cid = bi->ctx->id; cmd->body.s1.stage = bi->i1.texture_stage; cmd->body.s1.name = SVGA3D_TS_BIND_TEXTURE; - cmd->body.s1.value = (uint32) SVGA3D_INVALID_ID; + cmd->body.s1.value = + cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID); vmw_fifo_commit(dev_priv, sizeof(*cmd)); return 0; @@ -692,6 +702,7 @@ int vmw_context_binding_add(struct vmw_ctx_binding_state *cbs, vmw_context_binding_drop(loc); loc->bi = *bi; + loc->bi.scrubbed = false; list_add_tail(&loc->ctx_list, &cbs->list); INIT_LIST_HEAD(&loc->res_list); @@ -727,12 +738,11 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs, if (loc->bi.ctx != NULL) vmw_context_binding_drop(loc); - loc->bi = *bi; - list_add_tail(&loc->ctx_list, &cbs->list); - if (bi->res != NULL) + if (bi->res != NULL) { + loc->bi = *bi; + list_add_tail(&loc->ctx_list, &cbs->list); list_add_tail(&loc->res_list, &bi->res->binding_head); - else - INIT_LIST_HEAD(&loc->res_list); + } } /** @@ -746,7 +756,10 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs, */ static void vmw_context_binding_kill(struct vmw_ctx_binding *cb) { - (void) vmw_scrub_funcs[cb->bi.bt](&cb->bi); + if (!cb->bi.scrubbed) { + (void) vmw_scrub_funcs[cb->bi.bt](&cb->bi, false); + cb->bi.scrubbed = true; + } vmw_context_binding_drop(cb); } @@ -767,6 +780,27 @@ static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs) vmw_context_binding_kill(entry); } +/** + * vmw_context_binding_state_scrub - Scrub all bindings associated with a + * struct vmw_ctx_binding state structure. + * + * @cbs: Pointer to the context binding state tracker. + * + * Emits commands to scrub all bindings associated with the + * context binding state tracker. + */ +static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs) +{ + struct vmw_ctx_binding *entry; + + list_for_each_entry(entry, &cbs->list, ctx_list) { + if (!entry->bi.scrubbed) { + (void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false); + entry->bi.scrubbed = true; + } + } +} + /** * vmw_context_binding_res_list_kill - Kill all bindings on a * resource binding list @@ -784,6 +818,27 @@ void vmw_context_binding_res_list_kill(struct list_head *head) vmw_context_binding_kill(entry); } +/** + * vmw_context_binding_res_list_scrub - Scrub all bindings on a + * resource binding list + * + * @head: list head of resource binding list + * + * Scrub all bindings associated with a specific resource. Typically + * called before the resource is evicted. + */ +void vmw_context_binding_res_list_scrub(struct list_head *head) +{ + struct vmw_ctx_binding *entry; + + list_for_each_entry(entry, head, res_list) { + if (!entry->bi.scrubbed) { + (void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false); + entry->bi.scrubbed = true; + } + } +} + /** * vmw_context_binding_state_transfer - Commit staged binding info * @@ -803,3 +858,50 @@ void vmw_context_binding_state_transfer(struct vmw_resource *ctx, list_for_each_entry_safe(entry, next, &from->list, ctx_list) vmw_context_binding_transfer(&uctx->cbs, &entry->bi); } + +/** + * vmw_context_rebind_all - Rebind all scrubbed bindings of a context + * + * @ctx: The context resource + * + * Walks through the context binding list and rebinds all scrubbed + * resources. + */ +int vmw_context_rebind_all(struct vmw_resource *ctx) +{ + struct vmw_ctx_binding *entry; + struct vmw_user_context *uctx = + container_of(ctx, struct vmw_user_context, res); + struct vmw_ctx_binding_state *cbs = &uctx->cbs; + int ret; + + list_for_each_entry(entry, &cbs->list, ctx_list) { + if (likely(!entry->bi.scrubbed)) + continue; + + if (WARN_ON(entry->bi.res == NULL || entry->bi.res->id == + SVGA3D_INVALID_ID)) + continue; + + ret = vmw_scrub_funcs[entry->bi.bt](&entry->bi, true); + if (unlikely(ret != 0)) + return ret; + + entry->bi.scrubbed = false; + } + + return 0; +} + +/** + * vmw_context_binding_list - Return a list of context bindings + * + * @ctx: The context resource + * + * Returns the current list of bindings of the given context. Note that + * this list becomes stale as soon as the dev_priv::binding_mutex is unlocked. + */ +struct list_head *vmw_context_binding_list(struct vmw_resource *ctx) +{ + return &(container_of(ctx, struct vmw_user_context, res)->cbs.list); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index cef0ff7ac738..ecaa302a6154 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -276,6 +276,7 @@ struct vmw_ctx_bindinfo { struct vmw_resource *ctx; struct vmw_resource *res; enum vmw_ctx_binding_type bt; + bool scrubbed; union { SVGA3dShaderType shader_type; SVGA3dRenderTargetType rt_type; @@ -574,6 +575,8 @@ struct vmw_user_resource_conv; extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); +extern struct vmw_resource * +vmw_resource_reference_unless_doomed(struct vmw_resource *res); extern int vmw_resource_validate(struct vmw_resource *res); extern int vmw_resource_reserve(struct vmw_resource *res, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -962,6 +965,9 @@ extern void vmw_context_binding_state_transfer(struct vmw_resource *res, struct vmw_ctx_binding_state *cbs); extern void vmw_context_binding_res_list_kill(struct list_head *head); +extern void vmw_context_binding_res_list_scrub(struct list_head *head); +extern int vmw_context_rebind_all(struct vmw_resource *ctx); +extern struct list_head *vmw_context_binding_list(struct vmw_resource *ctx); /* * Surface management - vmwgfx_surface.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 352224b9d667..269b85cc875a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -179,6 +179,44 @@ static int vmw_resource_val_add(struct vmw_sw_context *sw_context, return 0; } +/** + * vmw_resource_context_res_add - Put resources previously bound to a context on + * the validation list + * + * @dev_priv: Pointer to a device private structure + * @sw_context: Pointer to a software context used for this command submission + * @ctx: Pointer to the context resource + * + * This function puts all resources that were previously bound to @ctx on + * the resource validation list. This is part of the context state reemission + */ +static int vmw_resource_context_res_add(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + struct vmw_resource *ctx) +{ + struct list_head *binding_list; + struct vmw_ctx_binding *entry; + int ret = 0; + struct vmw_resource *res; + + mutex_lock(&dev_priv->binding_mutex); + binding_list = vmw_context_binding_list(ctx); + + list_for_each_entry(entry, binding_list, ctx_list) { + res = vmw_resource_reference_unless_doomed(entry->bi.res); + if (unlikely(res == NULL)) + continue; + + ret = vmw_resource_val_add(sw_context, entry->bi.res, NULL); + vmw_resource_unreference(&res); + if (unlikely(ret != 0)) + break; + } + + mutex_unlock(&dev_priv->binding_mutex); + return ret; +} + /** * vmw_resource_relocation_add - Add a relocation to the relocation list * @@ -470,7 +508,11 @@ vmw_cmd_compat_res_check(struct vmw_private *dev_priv, if (p_val) *p_val = node; - if (node->first_usage && res_type == vmw_res_context) { + if (dev_priv->has_mob && node->first_usage && + res_type == vmw_res_context) { + ret = vmw_resource_context_res_add(dev_priv, sw_context, res); + if (unlikely(ret != 0)) + goto out_no_reloc; node->staged_bindings = kzalloc(sizeof(*node->staged_bindings), GFP_KERNEL); if (node->staged_bindings == NULL) { @@ -516,6 +558,34 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, converter, *id_loc, id_loc, p_val); } +/** + * vmw_rebind_contexts - Rebind all resources previously bound to + * referenced contexts. + * + * @sw_context: Pointer to the software context. + * + * Rebind context binding points that have been scrubbed because of eviction. + */ +static int vmw_rebind_contexts(struct vmw_sw_context *sw_context) +{ + struct vmw_resource_val_node *val; + int ret; + + list_for_each_entry(val, &sw_context->resource_list, head) { + if (likely(!val->staged_bindings)) + continue; + + ret = vmw_context_rebind_all(val->res); + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Failed to rebind context.\n"); + return ret; + } + } + + return 0; +} + /** * vmw_cmd_cid_check - Check a command header for valid context information. * @@ -1640,9 +1710,10 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, struct vmw_resource_val_node *res_node; u32 shid = cmd->body.shid; - (void) vmw_compat_shader_lookup(sw_context->fp->shman, - cmd->body.type, - &shid); + if (shid != SVGA3D_INVALID_ID) + (void) vmw_compat_shader_lookup(sw_context->fp->shman, + cmd->body.type, + &shid); ret = vmw_cmd_compat_res_check(dev_priv, sw_context, vmw_res_shader, @@ -2395,6 +2466,12 @@ int vmw_execbuf_process(struct drm_file *file_priv, goto out_err; } + if (dev_priv->has_mob) { + ret = vmw_rebind_contexts(sw_context); + if (unlikely(ret != 0)) + goto out_err; + } + cmd = vmw_fifo_reserve(dev_priv, command_size); if (unlikely(cmd == NULL)) { DRM_ERROR("Failed reserving fifo space for commands.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 6fdd82d42f65..2aa4bc6a4d60 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -88,6 +88,11 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res) return res; } +struct vmw_resource * +vmw_resource_reference_unless_doomed(struct vmw_resource *res) +{ + return kref_get_unless_zero(&res->kref) ? res : NULL; +} /** * vmw_resource_release_id - release a resource id to the id manager. @@ -136,8 +141,12 @@ static void vmw_resource_release(struct kref *kref) vmw_dmabuf_unreference(&res->backup); } - if (likely(res->hw_destroy != NULL)) + if (likely(res->hw_destroy != NULL)) { res->hw_destroy(res); + mutex_lock(&dev_priv->binding_mutex); + vmw_context_binding_res_list_kill(&res->binding_head); + mutex_unlock(&dev_priv->binding_mutex); + } id = res->id; if (res->res_free != NULL) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index be85d7fdfb5b..217d941b8176 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -304,7 +304,7 @@ static int vmw_gb_shader_destroy(struct vmw_resource *res) return 0; mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_res_list_kill(&res->binding_head); + vmw_context_binding_res_list_scrub(&res->binding_head); cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index ec2b99833fce..82468d902915 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1111,7 +1111,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res) return 0; mutex_lock(&dev_priv->binding_mutex); - vmw_context_binding_res_list_kill(&res->binding_head); + vmw_context_binding_res_list_scrub(&res->binding_head); cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { From 276ab336b4c6e483d12fd46cbf24f97f71867710 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Feb 2014 08:49:41 +0100 Subject: [PATCH 118/171] ALSA: hda - Improve loopback path lookups for AD1983 AD1983 has flexible loopback routes and the generic parser would take wrong path confusingly instead of taking individual paths via NID 0x0c and 0x0d. For avoiding it, limit the connections at these widgets so that the parser can think more straightforwardly. This fixes the regression of the missing line-in loopback on Dell machine. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70011 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 195cd62cdce5..df3652ad15ef 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -491,6 +491,8 @@ static int ad1983_add_spdif_mux_ctl(struct hda_codec *codec) static int patch_ad1983(struct hda_codec *codec) { struct ad198x_spec *spec; + static hda_nid_t conn_0c[] = { 0x08 }; + static hda_nid_t conn_0d[] = { 0x09 }; int err; err = alloc_ad_spec(codec); @@ -501,6 +503,11 @@ static int patch_ad1983(struct hda_codec *codec) spec->gen.mixer_nid = 0x0e; spec->gen.beep_nid = 0x10; set_beep_amp(spec, 0x10, 0, HDA_OUTPUT); + + /* limit the loopback routes not to confuse the parser */ + snd_hda_override_conn_list(codec, 0x0c, ARRAY_SIZE(conn_0c), conn_0c); + snd_hda_override_conn_list(codec, 0x0d, ARRAY_SIZE(conn_0d), conn_0d); + err = ad198x_parse_auto_config(codec, false); if (err < 0) goto error; From cd9a21a831af0af7539a0e37e4455da03df7cf82 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 30 Jan 2014 21:27:25 -0500 Subject: [PATCH 119/171] vmwgfx: Fix unitialized stack read in vmw_setup_otable_base One of the error paths in vmw_setup_otable_base causes us to return with 'ret' having never been set to anything causing us to return whatever was on the stack. Found with Coverity Signed-off-by: Dave Jones Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index 4910e7b81811..d4a5a19cb8c3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -134,6 +134,7 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv, cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); if (unlikely(cmd == NULL)) { DRM_ERROR("Failed reserving FIFO space for OTable setup.\n"); + ret = -ENOMEM; goto out_no_fifo; } From c66f854338253e603a4fb6817069ee23eacf0ae3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 24 Jan 2014 08:49:45 +0100 Subject: [PATCH 120/171] drm/ttm: Fix TTM object open regression Commit drm/ttm: ttm object security fixes for render nodes introduced a regression where, if a TTM object was opened multiple times from the same open file, the caller would spin uninterruptibly in the kernel. Fix this. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/ttm/ttm_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 37079859afc8..53b51c4e671a 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -292,7 +292,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, if (ret == 0) { ref = drm_hash_entry(hash, struct ttm_ref_object, hash); - if (!kref_get_unless_zero(&ref->kref)) { + if (kref_get_unless_zero(&ref->kref)) { rcu_read_unlock(); break; } From 923fa4ea382f592dee2ba3b205befb90cbddf3af Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Thu, 30 Jan 2014 16:50:10 -0800 Subject: [PATCH 121/171] genirq: Generic irq chip requires IRQ_DOMAIN The generic_chip.c uses interfaces from irq_domain.c which is controlled by the IRQ_DOMAIN config option, but there is no Kconfig dependency so the build can fail: linux/kernel/irq/generic-chip.c:400:11: error: 'irq_domain_xlate_onetwocell' undeclared here (not in a function) Select IRQ_DOMAIN when GENERIC_IRQ_CHIP is selected. Signed-off-by: Nitin A Kamble Link: http://lkml.kernel.org/r/1391129410-54548-2-git-send-email-nitin.a.kamble@intel.com Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org # 3.11+ --- kernel/irq/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 4a1fef09f658..07cbdfea9ae2 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -40,6 +40,7 @@ config IRQ_EDGE_EOI_HANDLER # Generic configurable interrupt chip implementation config GENERIC_IRQ_CHIP bool + select IRQ_DOMAIN # Generic irq_domain hw <--> linux irq number translation config IRQ_DOMAIN From 5044bad43ee573d0b6d90e3ccb7a40c2c7d25eb4 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 5 Feb 2014 09:34:36 +0000 Subject: [PATCH 122/171] arm64: add DSB after icache flush in __flush_icache_all() Add DSB after icache flush to complete the cache maintenance operation. The function __flush_icache_all() is used only for user space mappings and an ISB is not required because of an exception return before executing user instructions. An exception return would behave like an ISB. Signed-off-by: Vinayak Kale Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cacheflush.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index fea9ee327206..889324981aa4 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); + dsb(); } #define flush_dcache_mmap_lock(mapping) \ From ccc9e244eb1b9654915634827322932cbafd8244 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Feb 2014 23:08:57 +0000 Subject: [PATCH 123/171] arm64: Align CMA sizes to PAGE_SIZE dma_alloc_from_contiguous takes number of pages for a size. Align up the dma size passed in to page size to avoid truncation and allocation failures on sizes less than PAGE_SIZE. Cc: Will Deacon Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 45b5ab54c9ee..fbd76785c5db 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -45,6 +45,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; + size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) From a55f9929a9b257f84b6cc7b2397379cabd744a22 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:01:31 +0000 Subject: [PATCH 124/171] arm64: Invalidate the TLB when replacing pmd entries during boot With the 64K page size configuration, __create_page_tables in head.S maps enough memory to get started but using 64K pages rather than 512M sections with a single pgd/pud/pmd entry pointing to a pte table. create_mapping() may override the pgd/pud/pmd table entry with a block (section) one if the RAM size is more than 512MB and aligned correctly. For the end of this block to be accessible, the old TLB entry must be invalidated. Cc: Reported-by: Mark Salter Tested-by: Mark Salter Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f557ebbe7013..f8dc7e8fce6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) + if (((addr | next | phys) & ~SECTION_MASK) == 0) { + pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | prot_sect_kernel)); - else + /* + * Check for previous table entries created during + * boot (__create_page_tables) and flush them. + */ + if (!pmd_none(old_pmd)) + flush_tlb_all(); + } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + } phys += next - addr; } while (pmd++, addr = next, addr != end); } From bfb67a5606376bb32cb6f93dc05cda2e8c2038a5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:12 +0000 Subject: [PATCH 125/171] arm64: fix typo: s/SERRROR/SERROR/ Somehow SERROR has acquired an additional 'R' in a couple of headers. This patch removes them before they spread further. As neither instance is in use yet, no other sites need to be fixed up. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 78834123a32e..c4a7f940b387 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -42,7 +42,7 @@ #define ESR_EL1_EC_SP_ALIGN (0x26) #define ESR_EL1_EC_FP_EXC32 (0x28) #define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERRROR (0x2F) +#define ESR_EL1_EC_SERROR (0x2F) #define ESR_EL1_EC_BREAKPT_EL0 (0x30) #define ESR_EL1_EC_BREAKPT_EL1 (0x31) #define ESR_EL1_EC_SOFTSTP_EL0 (0x32) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c98ef4771c73..0eb398655378 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -231,7 +231,7 @@ #define ESR_EL2_EC_SP_ALIGN (0x26) #define ESR_EL2_EC_FP_EXC32 (0x28) #define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_SERROR (0x2F) #define ESR_EL2_EC_BREAKPT (0x30) #define ESR_EL2_EC_BREAKPT_HYP (0x31) #define ESR_EL2_EC_SOFTSTP (0x32) From 883d50a0ed403446437444a495356ce31e1197a3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:13 +0000 Subject: [PATCH 126/171] arm64: simplify pgd_alloc Currently pgd_alloc has a redundant NULL check in its return path that can be removed with no ill effects. With that removed it's also possible to return early and eliminate the new_pgd temporary variable. This patch applies said modifications, making the logic of pgd_alloc correspond 1-1 with that of pgd_free. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/pgd.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7083cdada657..62c6101df260 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -32,17 +32,10 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *new_pgd; - if (PGD_SIZE == PAGE_SIZE) - new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)get_zeroed_page(GFP_KERNEL); else - new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL); - - if (!new_pgd) - return NULL; - - return new_pgd; + return kzalloc(PGD_SIZE, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) From 8fcfb99c8e29c73dd8945b6105ef54ca4eeb171e Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 4 Feb 2014 17:48:28 -0700 Subject: [PATCH 127/171] ACPI / hotplug: Fix panic on eject to ejected device When an eject request is sent to an ejected ACPI device, the following panic occurs: ACPI: \_SB_.SCK3.CPU3: ACPI_NOTIFY_EJECT_REQUEST event BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 IP: [] acpi_device_hotplug+0x10b/0x33b : Call Trace: [] acpi_hotplug_work_fn+0x1c/0x27 [] process_one_work+0x175/0x430 [] worker_thread+0x11b/0x3a0 This is becase device->handler is NULL in acpi_device_hotplug(). This case was used to fail in acpi_hotplug_notify_cb() as the target had no acpi_deivce. However, acpi_device now exists after ejection. Added a check to verify if acpi_device->handler is valid for an eject request in acpi_hotplug_notify_cb(). Note that handler passed from an argument is still valid while acpi_device->handler is NULL. Fixes: 202317a573b2 (ACPI / scan: Add acpi_device objects for all device nodes in the namespace) Signed-off-by: Toshi Kani Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 7384158c7f87..57b053f424d1 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -484,7 +484,6 @@ static void acpi_device_hotplug(void *data, u32 src) static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data) { u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; - struct acpi_scan_handler *handler = data; struct acpi_device *adev; acpi_status status; @@ -500,7 +499,10 @@ static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data) break; case ACPI_NOTIFY_EJECT_REQUEST: acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n"); - if (!handler->hotplug.enabled) { + if (!adev->handler) + goto err_out; + + if (!adev->handler->hotplug.enabled) { acpi_handle_err(handle, "Eject disabled\n"); ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED; goto err_out; From 069b918623e1510e58dacf178905a72c3baa3ae4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 5 Feb 2014 05:53:04 +0000 Subject: [PATCH 128/171] arm64: vdso: fix coarse clock handling When __kernel_clock_gettime is called with a CLOCK_MONOTONIC_COARSE or CLOCK_REALTIME_COARSE clock id, it returns incorrectly to whatever the caller has placed in x2 ("ret x2" to return from the fast path). Fix this by saving x30/LR to x2 only in code that will call __do_get_tspec, restoring x30 afterward, and using a plain "ret" to return from the routine. Also: while the resulting tv_nsec value for CLOCK_REALTIME and CLOCK_MONOTONIC must be computed using intermediate values that are left-shifted by cs_shift (x12, set by __do_get_tspec), the results for coarse clocks should be calculated using unshifted values (xtime_coarse_nsec is in units of actual nanoseconds). The current code shifts intermediate values by x12 unconditionally, but x12 is uninitialized when servicing a coarse clock. Fix this by setting x12 to 0 once we know we are dealing with a coarse clock id. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/gettimeofday.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b5211..fe652ffd34c2 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime) bl __do_get_tspec seqcnt_check w9, 1b + mov x30, x2 + cmp w0, #CLOCK_MONOTONIC b.ne 6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime) ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 8f + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + /* Get coarse timespec. */ adr vdso_data, _vdso_data 3: seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime) lsr x11, x11, x12 stp x10, x11, [x1, #TSPEC_TV_SEC] mov x0, xzr - ret x2 + ret 7: mov x30, x2 8: /* Syscall fallback. */ From d4022a335271a48cce49df35d825897914fbffe3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 Feb 2014 19:48:52 +0000 Subject: [PATCH 129/171] arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE Update wall-to-monotonic fields in the VDSO data page unconditionally. These are used to service CLOCK_MONOTONIC_COARSE, which is not guarded by use_syscall. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 65d40cf6945a..a7149cae1615 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -238,6 +238,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->use_syscall = use_syscall; vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { vdso_data->cs_cycle_last = tk->clock->cycle_last; @@ -245,8 +247,6 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; vdso_data->cs_shift = tk->shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; } smp_wmb(); From 6290b53de025dd08a79257ee84173ef7b6d926f7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 5 Feb 2014 12:03:52 +0000 Subject: [PATCH 130/171] arm64: compat: Wire up new AArch32 syscalls This patch enables sys_compat, sys_finit_module, sys_sched_setattr and sys_sched_getattr for compat (AArch32) applications. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/unistd32.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 58125bf008d3..bb8eb8a78e67 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -399,7 +399,10 @@ __SYSCALL(374, compat_sys_sendmmsg) __SYSCALL(375, sys_setns) __SYSCALL(376, compat_sys_process_vm_readv) __SYSCALL(377, compat_sys_process_vm_writev) -__SYSCALL(378, sys_ni_syscall) /* 378 for kcmp */ +__SYSCALL(378, sys_kcmp) +__SYSCALL(379, sys_finit_module) +__SYSCALL(380, sys_sched_setattr) +__SYSCALL(381, sys_sched_getattr) #define __NR_compat_syscalls 379 From 530b099dfe8499d639e7fbcad28c4199e2a720c7 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 4 Feb 2014 02:15:32 +0000 Subject: [PATCH 131/171] security: select correct default LSM_MMAP_MIN_ADDR on arm on arm64 Binaries compiled for arm may run on arm64 if CONFIG_COMPAT is selected. Set LSM_MMAP_MIN_ADDR to 32768 if ARM64 && COMPAT to prevent selinux failures launching 32-bit static executables that are mapped at 0x8000. Signed-off-by: Colin Cross Acked-by: Will Deacon Acked-by: Eric Paris Acked-by: James Morris Signed-off-by: Catalin Marinas --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index e9c6ac724fef..beb86b500adf 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -103,7 +103,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX - default 32768 if ARM + default 32768 if ARM || (ARM64 && COMPAT) default 65536 help This is the portion of low virtual memory which should be protected From 1b76af5ce8deb1c775ad1674bd249d782b5b13d9 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 5 Feb 2014 09:18:26 +0100 Subject: [PATCH 132/171] drm/ttm: Don't clear page metadata of imported sg pages These page pointers shouldn't be visible to TTM in the first place, but until we fix that up, don't clear the page metadata because that will upset the exporter. Reported-and-tested-by: Cristoph Haag Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/ttm/ttm_tt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 9af99084b344..75f319090043 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -380,6 +380,9 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) pgoff_t i; struct page **page = ttm->pages; + if (ttm->page_flags & TTM_PAGE_FLAG_SG) + return; + for (i = 0; i < ttm->num_pages; ++i) { (*page)->mapping = NULL; (*page++)->index = 0; From 6a96e15096da6e7491107321cfa660c7c2aa119d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 28 Jan 2014 14:45:41 -0500 Subject: [PATCH 133/171] selinux: add SOCK_DIAG_BY_FAMILY to the list of netlink message types The SELinux AF_NETLINK/NETLINK_SOCK_DIAG socket class was missing the SOCK_DIAG_BY_FAMILY definition which caused SELINUX_ERR messages when the ss tool was run. # ss Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port u_str ESTAB 0 0 * 14189 * 14190 u_str ESTAB 0 0 * 14145 * 14144 u_str ESTAB 0 0 * 14151 * 14150 {...} # ausearch -m SELINUX_ERR ---- time->Thu Jan 23 11:11:16 2014 type=SYSCALL msg=audit(1390493476.445:374): arch=c000003e syscall=44 success=yes exit=40 a0=3 a1=7fff03aa11f0 a2=28 a3=0 items=0 ppid=1852 pid=1895 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="ss" exe="/usr/sbin/ss" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=SELINUX_ERR msg=audit(1390493476.445:374): SELinux: unrecognized netlink message type=20 for sclass=32 Signed-off-by: Paul Moore --- security/selinux/nlmsgtab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 332ac8a80cf5..2df7b900e259 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "flask.h" #include "av_permissions.h" @@ -78,6 +79,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_xfrm_perms[] = From 2172fa709ab32ca60e86179dc67d0857be8e2c98 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 30 Jan 2014 11:26:59 -0500 Subject: [PATCH 134/171] SELinux: Fix kernel BUG on empty security contexts. Setting an empty security context (length=0) on a file will lead to incorrectly dereferencing the type and other fields of the security context structure, yielding a kernel BUG. As a zero-length security context is never valid, just reject all such security contexts whether coming from userspace via setxattr or coming from the filesystem upon a getxattr request by SELinux. Setting a security context value (empty or otherwise) unknown to SELinux in the first place is only possible for a root process (CAP_MAC_ADMIN), and, if running SELinux in enforcing mode, only if the corresponding SELinux mac_admin permission is also granted to the domain by policy. In Fedora policies, this is only allowed for specific domains such as livecd for setting down security contexts that are not defined in the build host policy. Reproducer: su setenforce 0 touch foo setfattr -n security.selinux foo Caveat: Relabeling or removing foo after doing the above may not be possible without booting with SELinux disabled. Any subsequent access to foo after doing the above will also trigger the BUG. BUG output from Matthew Thode: [ 473.893141] ------------[ cut here ]------------ [ 473.962110] kernel BUG at security/selinux/ss/services.c:654! [ 473.995314] invalid opcode: 0000 [#6] SMP [ 474.027196] Modules linked in: [ 474.058118] CPU: 0 PID: 8138 Comm: ls Tainted: G D I 3.13.0-grsec #1 [ 474.116637] Hardware name: Supermicro X8ST3/X8ST3, BIOS 2.0 07/29/10 [ 474.149768] task: ffff8805f50cd010 ti: ffff8805f50cd488 task.ti: ffff8805f50cd488 [ 474.183707] RIP: 0010:[] [] context_struct_compute_av+0xce/0x308 [ 474.219954] RSP: 0018:ffff8805c0ac3c38 EFLAGS: 00010246 [ 474.252253] RAX: 0000000000000000 RBX: ffff8805c0ac3d94 RCX: 0000000000000100 [ 474.287018] RDX: ffff8805e8aac000 RSI: 00000000ffffffff RDI: ffff8805e8aaa000 [ 474.321199] RBP: ffff8805c0ac3cb8 R08: 0000000000000010 R09: 0000000000000006 [ 474.357446] R10: 0000000000000000 R11: ffff8805c567a000 R12: 0000000000000006 [ 474.419191] R13: ffff8805c2b74e88 R14: 00000000000001da R15: 0000000000000000 [ 474.453816] FS: 00007f2e75220800(0000) GS:ffff88061fc00000(0000) knlGS:0000000000000000 [ 474.489254] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 474.522215] CR2: 00007f2e74716090 CR3: 00000005c085e000 CR4: 00000000000207f0 [ 474.556058] Stack: [ 474.584325] ffff8805c0ac3c98 ffffffff811b549b ffff8805c0ac3c98 ffff8805f1190a40 [ 474.618913] ffff8805a6202f08 ffff8805c2b74e88 00068800d0464990 ffff8805e8aac860 [ 474.653955] ffff8805c0ac3cb8 000700068113833a ffff880606c75060 ffff8805c0ac3d94 [ 474.690461] Call Trace: [ 474.723779] [] ? lookup_fast+0x1cd/0x22a [ 474.778049] [] security_compute_av+0xf4/0x20b [ 474.811398] [] avc_compute_av+0x2a/0x179 [ 474.843813] [] avc_has_perm+0x45/0xf4 [ 474.875694] [] inode_has_perm+0x2a/0x31 [ 474.907370] [] selinux_inode_getattr+0x3c/0x3e [ 474.938726] [] security_inode_getattr+0x1b/0x22 [ 474.970036] [] vfs_getattr+0x19/0x2d [ 475.000618] [] vfs_fstatat+0x54/0x91 [ 475.030402] [] vfs_lstat+0x19/0x1b [ 475.061097] [] SyS_newlstat+0x15/0x30 [ 475.094595] [] ? __audit_syscall_entry+0xa1/0xc3 [ 475.148405] [] system_call_fastpath+0x16/0x1b [ 475.179201] Code: 00 48 85 c0 48 89 45 b8 75 02 0f 0b 48 8b 45 a0 48 8b 3d 45 d0 b6 00 8b 40 08 89 c6 ff ce e8 d1 b0 06 00 48 85 c0 49 89 c7 75 02 <0f> 0b 48 8b 45 b8 4c 8b 28 eb 1e 49 8d 7d 08 be 80 01 00 00 e8 [ 475.255884] RIP [] context_struct_compute_av+0xce/0x308 [ 475.296120] RSP [ 475.328734] ---[ end trace f076482e9d754adc ]--- Reported-by: Matthew Thode Signed-off-by: Stephen Smalley Cc: stable@vger.kernel.org Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index fc5a63a05a1c..f1e46d776544 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1232,6 +1232,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, struct context context; int rc = 0; + /* An empty security context is never valid. */ + if (!scontext_len) + return -EINVAL; + if (!ss_initialized) { int i; From da9846ae15186d491d6e21ebbb5051e1d3c7f652 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 29 Jan 2014 12:04:03 -0500 Subject: [PATCH 135/171] kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag kernfs_deactivate() forgot to check whether KERNFS_LOCKDEP is set before performing lockdep annotations and ends up feeding uninitialized lockdep_map to lockdep triggering warning like the following on USB stick hotunplug. usb 1-2: USB disconnect, device number 2 INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 62 Comm: khubd Not tainted 3.13.0-work+ #82 Hardware name: empty empty/S3992, BIOS 080011 10/26/2007 ffff880065ca7f60 ffff88013a4ffa08 ffffffff81cfb6bd 0000000000000002 ffff88013a4ffac8 ffffffff810f8530 ffff88013a4fc710 0000000000000002 ffff880100000000 ffffffff82a3db50 0000000000000001 ffff88013a4fc710 Call Trace: [] dump_stack+0x4e/0x7a [] __lock_acquire+0x1910/0x1e70 [] lock_acquire+0x9a/0x1d0 [] kernfs_deactivate+0xee/0x130 [] kernfs_addrm_finish+0x38/0x60 [] kernfs_remove_by_name_ns+0x51/0xa0 [] remove_files.isra.1+0x41/0x80 [] sysfs_remove_group+0x47/0xa0 [] sysfs_remove_groups+0x33/0x50 [] device_remove_attrs+0x4d/0x80 [] device_del+0x12e/0x1d0 [] usb_disconnect+0x122/0x1a0 [] hub_thread+0x3c5/0x1290 [] kthread+0xed/0x110 [] ret_from_fork+0x7c/0xb0 Fix it by making kernfs_deactivate() perform lockdep annotations only if KERNFS_LOCKDEP is set. Signed-off-by: Tejun Heo Reported-by: Fabio Estevam Reported-by: Alan Stern Reported-by: Jiri Kosina Reported-by: Dave Jones Tested-by: Fabio Estevam Tested-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5104cf5d25c5..bd6e18be6e1a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -187,19 +187,23 @@ static void kernfs_deactivate(struct kernfs_node *kn) kn->u.completion = (void *)&wait; - rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) + rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); /* atomic_add_return() is a mb(), put_active() will always see * the updated kn->u.completion. */ v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); if (v != KN_DEACTIVATED_BIAS) { - lock_contended(&kn->dep_map, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) + lock_contended(&kn->dep_map, _RET_IP_); wait_for_completion(&wait); } - lock_acquired(&kn->dep_map, _RET_IP_); - rwsem_release(&kn->dep_map, 1, _RET_IP_); + if (kn->flags & KERNFS_LOCKDEP) { + lock_acquired(&kn->dep_map, _RET_IP_); + rwsem_release(&kn->dep_map, 1, _RET_IP_); + } } /** From c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Feb 2014 12:54:53 -0800 Subject: [PATCH 136/171] execve: use 'struct filename *' for executable name passing This changes 'do_execve()' to get the executable name as a 'struct filename', and to free it when it is done. This is what the normal users want, and it simplifies and streamlines their error handling. The controlled lifetime of the executable name also fixes a use-after-free problem with the trace_sched_process_exec tracepoint: the lifetime of the passed-in string for kernel users was not at all obvious, and the user-mode helper code used UMH_WAIT_EXEC to serialize the pathname allocation lifetime with the execve() having finished, which in turn meant that the trace point that happened after mm_release() of the old process VM ended up using already free'd memory. To solve the kernel string lifetime issue, this simply introduces "getname_kernel()" that works like the normal user-space getname() function, except with the source coming from kernel memory. As Oleg points out, this also means that we could drop the tcomm[] array from 'struct linux_binprm', since the pathname lifetime now covers setup_new_exec(). That would be a separate cleanup. Reported-by: Igor Zhbanov Tested-by: Steven Rostedt Cc: Oleg Nesterov Cc: Al Viro Signed-off-by: Linus Torvalds --- arch/parisc/hpux/fs.c | 15 +------------- fs/exec.c | 45 +++++++++++++++++++---------------------- fs/namei.c | 30 +++++++++++++++++++++++++++ include/linux/binfmts.h | 1 - include/linux/fs.h | 1 + include/linux/sched.h | 3 ++- init/main.c | 2 +- kernel/auditsc.c | 2 +- kernel/kmod.c | 2 +- 9 files changed, 58 insertions(+), 43 deletions(-) diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 88d0962de65a..2bedafea3d94 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -33,22 +33,9 @@ int hpux_execve(struct pt_regs *regs) { - int error; - struct filename *filename; - - filename = getname((const char __user *) regs->gr[26]); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - - error = do_execve(filename->name, + return do_execve(getname((const char __user *) regs->gr[26]), (const char __user *const __user *) regs->gr[25], (const char __user *const __user *) regs->gr[24]); - - putname(filename); - -out: - return error; } struct hpux_dirent { diff --git a/fs/exec.c b/fs/exec.c index e1529b4c79b1..3d78fccdd723 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -748,11 +748,10 @@ EXPORT_SYMBOL(setup_arg_pages); #endif /* CONFIG_MMU */ -struct file *open_exec(const char *name) +static struct file *do_open_exec(struct filename *name) { struct file *file; int err; - struct filename tmp = { .name = name }; static const struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC | MAY_OPEN, @@ -760,7 +759,7 @@ struct file *open_exec(const char *name) .lookup_flags = LOOKUP_FOLLOW, }; - file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags); + file = do_filp_open(AT_FDCWD, name, &open_exec_flags); if (IS_ERR(file)) goto out; @@ -784,6 +783,12 @@ exit: fput(file); return ERR_PTR(err); } + +struct file *open_exec(const char *name) +{ + struct filename tmp = { .name = name }; + return do_open_exec(&tmp); +} EXPORT_SYMBOL(open_exec); int kernel_read(struct file *file, loff_t offset, @@ -1162,7 +1167,7 @@ int prepare_bprm_creds(struct linux_binprm *bprm) return -ENOMEM; } -void free_bprm(struct linux_binprm *bprm) +static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { @@ -1432,7 +1437,7 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execve_common(const char *filename, +static int do_execve_common(struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp) { @@ -1441,6 +1446,9 @@ static int do_execve_common(const char *filename, struct files_struct *displaced; int retval; + if (IS_ERR(filename)) + return PTR_ERR(filename); + /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs @@ -1473,7 +1481,7 @@ static int do_execve_common(const char *filename, check_unsafe_exec(bprm); current->in_execve = 1; - file = open_exec(filename); + file = do_open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1481,8 +1489,7 @@ static int do_execve_common(const char *filename, sched_exec(); bprm->file = file; - bprm->filename = filename; - bprm->interp = filename; + bprm->filename = bprm->interp = filename->name; retval = bprm_mm_init(bprm); if (retval) @@ -1523,6 +1530,7 @@ static int do_execve_common(const char *filename, acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1544,10 +1552,11 @@ out_files: if (displaced) reset_files_struct(displaced); out_ret: + putname(filename); return retval; } -int do_execve(const char *filename, +int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { @@ -1557,7 +1566,7 @@ int do_execve(const char *filename, } #ifdef CONFIG_COMPAT -static int compat_do_execve(const char *filename, +static int compat_do_execve(struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp) { @@ -1607,25 +1616,13 @@ SYSCALL_DEFINE3(execve, const char __user *const __user *, argv, const char __user *const __user *, envp) { - struct filename *path = getname(filename); - int error = PTR_ERR(path); - if (!IS_ERR(path)) { - error = do_execve(path->name, argv, envp); - putname(path); - } - return error; + return do_execve(getname(filename), argv, envp); } #ifdef CONFIG_COMPAT asmlinkage long compat_sys_execve(const char __user * filename, const compat_uptr_t __user * argv, const compat_uptr_t __user * envp) { - struct filename *path = getname(filename); - int error = PTR_ERR(path); - if (!IS_ERR(path)) { - error = compat_do_execve(path->name, argv, envp); - putname(path); - } - return error; + return compat_do_execve(getname(filename), argv, envp); } #endif diff --git a/fs/namei.c b/fs/namei.c index d580df2e6804..385f7817bfcc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -196,6 +196,7 @@ recopy: goto error; result->uptr = filename; + result->aname = NULL; audit_getname(result); return result; @@ -210,6 +211,35 @@ getname(const char __user * filename) return getname_flags(filename, 0, NULL); } +/* + * The "getname_kernel()" interface doesn't do pathnames longer + * than EMBEDDED_NAME_MAX. Deal with it - you're a kernel user. + */ +struct filename * +getname_kernel(const char * filename) +{ + struct filename *result; + char *kname; + int len; + + len = strlen(filename); + if (len >= EMBEDDED_NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + + result = __getname(); + if (unlikely(!result)) + return ERR_PTR(-ENOMEM); + + kname = (char *)result + sizeof(*result); + result->name = kname; + result->uptr = NULL; + result->aname = NULL; + result->separate = false; + + strlcpy(kname, filename, EMBEDDED_NAME_MAX); + return result; +} + #ifdef CONFIG_AUDITSYSCALL void putname(struct filename *name) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fd8bf3219ef7..b4a745d7d9a9 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -115,7 +115,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv, extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); -extern void free_bprm(struct linux_binprm *); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 09f553c59813..d79678c188ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2079,6 +2079,7 @@ extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname(const char __user *); +extern struct filename *getname_kernel(const char *); enum { FILE_CREATED = 1, diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84463a0..a781dec1cd0b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -128,6 +128,7 @@ struct bio_list; struct fs_struct; struct perf_event_context; struct blk_plug; +struct filename; /* * List of flags we want to share for kernel threads, @@ -2311,7 +2312,7 @@ extern void do_group_exit(int); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(const char *, +extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); diff --git a/init/main.c b/init/main.c index 2fd9cef70ee8..eb03090cdced 100644 --- a/init/main.c +++ b/init/main.c @@ -812,7 +812,7 @@ void __init load_default_modules(void) static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - return do_execve(init_filename, + return do_execve(getname_kernel(init_filename), (const char __user *const __user *)argv_init, (const char __user *const __user *)envp_init); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 10176cd5956a..7aef2f4b6c64 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1719,7 +1719,7 @@ void audit_putname(struct filename *name) struct audit_context *context = current->audit_context; BUG_ON(!context); - if (!context->in_syscall) { + if (!name->aname || !context->in_syscall) { #if AUDIT_DEBUG == 2 printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n", __FILE__, __LINE__, context->serial, name); diff --git a/kernel/kmod.c b/kernel/kmod.c index b086006c59e7..6b375af4958d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -239,7 +239,7 @@ static int ____call_usermodehelper(void *data) commit_creds(new); - retval = do_execve(sub_info->path, + retval = do_execve(getname_kernel(sub_info->path), (const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->envp); if (!retval) From f8f202348208fa8a2d817b42f250e145fa885620 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Feb 2014 06:51:37 +0100 Subject: [PATCH 137/171] x86: Disable CONFIG_X86_DECODER_SELFTEST in allmod/allyesconfigs It can take some time to validate the image, make sure {allyes|allmod}config doesn't enable it. I'd say randconfig will cover it often enough, and the failure is also borderline build coverage related: you cannot really make the decoder test fail via source level changes, only with changes in the build environment, so I agree with Andi that we can disable this one too. Signed-off-by: Ingo Molnar Acked-by: Paul Gortmaker paul.gortmaker@windriver.com> Suggested-and-acked-by: Andi Kleen andi@firstfloor.org> Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 0f3621ed1db6..321a52ccf63a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -184,6 +184,7 @@ config HAVE_MMIOTRACE_SUPPORT config X86_DECODER_SELFTEST bool "x86 instruction decoder selftest" depends on DEBUG_KERNEL && KPROBES + depends on !COMPILE_TEST ---help--- Perform x86 instruction decoder selftests at build time. This option is useful for checking the sanity of x86 instruction From 081cd62a010f97b5bc1d2b0cd123c5abc692b68a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 14 Jan 2014 12:40:09 +0000 Subject: [PATCH 138/171] x86/efi: Allow mapping BGRT on x86-32 CONFIG_X86_32 doesn't map the boot services regions into the EFI memory map (see commit 700870119f49 ("x86, efi: Don't map Boot Services on i386")), and so efi_lookup_mapped_addr() will fail to return a valid address. Executing the ioremap() path in efi_bgrt_init() causes the following warning on x86-32 because we're trying to ioremap() RAM, WARNING: CPU: 0 PID: 0 at arch/x86/mm/ioremap.c:102 __ioremap_caller+0x2ad/0x2c0() Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.13.0-0.rc5.git0.1.2.fc21.i686 #1 Hardware name: DellInc. Venue 8 Pro 5830/09RP78, BIOS A02 10/17/2013 00000000 00000000 c0c0df08 c09a5196 00000000 c0c0df38 c0448c1e c0b41310 00000000 00000000 c0b37bc1 00000066 c043bbfd c043bbfd 00e7dfe0 00073eff 00073eff c0c0df48 c0448ce2 00000009 00000000 c0c0df9c c043bbfd 00078d88 Call Trace: [] dump_stack+0x41/0x52 [] warn_slowpath_common+0x7e/0xa0 [] ? __ioremap_caller+0x2ad/0x2c0 [] ? __ioremap_caller+0x2ad/0x2c0 [] warn_slowpath_null+0x22/0x30 [] __ioremap_caller+0x2ad/0x2c0 [] ? acpi_tb_verify_table+0x1c/0x43 [] ? acpi_get_table_with_size+0x63/0xb5 [] ? efi_lookup_mapped_addr+0xe/0xf0 [] ioremap_nocache+0x1b/0x20 [] ? efi_bgrt_init+0x83/0x10c [] efi_bgrt_init+0x83/0x10c [] efi_late_init+0x8/0xa [] start_kernel+0x3ae/0x3c3 [] ? repair_env_string+0x51/0x51 [] i386_start_kernel+0x12e/0x131 Switch to using early_memremap(), which won't trigger this warning, and has the added benefit of more accurately conveying what we're trying to do - map a chunk of memory. This patch addresses the following bug report, https://bugzilla.kernel.org/show_bug.cgi?id=67911 Reported-by: Adam Williamson Cc: Josh Triplett Cc: Matthew Garrett Cc: Rafael J. Wysocki Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi-bgrt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 7145ec63c520..4df9591eadad 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -49,7 +49,8 @@ void __init efi_bgrt_init(void) image = efi_lookup_mapped_addr(bgrt_tab->image_address); if (!image) { - image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); + image = early_memremap(bgrt_tab->image_address, + sizeof(bmp_header)); ioremapped = true; if (!image) return; @@ -57,7 +58,7 @@ void __init efi_bgrt_init(void) memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); if (ioremapped) - iounmap(image); + early_iounmap(image, sizeof(bmp_header)); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); @@ -65,7 +66,8 @@ void __init efi_bgrt_init(void) return; if (ioremapped) { - image = ioremap(bgrt_tab->image_address, bmp_header.size); + image = early_memremap(bgrt_tab->image_address, + bmp_header.size); if (!image) { kfree(bgrt_image); bgrt_image = NULL; @@ -75,5 +77,5 @@ void __init efi_bgrt_init(void) memcpy_fromio(bgrt_image, image, bgrt_image_size); if (ioremapped) - iounmap(image); + early_iounmap(image, bmp_header.size); } From 8b7ad1bb3d440da888f2a939dc870eba429b9192 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 5 Feb 2014 14:47:45 +1000 Subject: [PATCH 139/171] drm/mgag200,ast,cirrus: fix regression with drm_can_sleep conversion I totally sign inverted my way out of this one. Cc: stable@vger.kernel.org Reported-by: "Sabrina Dubroca" Signed-off-by: Dave Airlie --- drivers/gpu/drm/ast/ast_fb.c | 2 +- drivers/gpu/drm/cirrus/cirrus_fbdev.c | 2 +- drivers/gpu/drm/mgag200/mgag200_fb.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 3f65dd6676b2..a28640f47c27 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -65,7 +65,7 @@ static void ast_dirty_update(struct ast_fbdev *afbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = ast_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c index 2fd4a92162cb..32bbba0a787b 100644 --- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c +++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c @@ -39,7 +39,7 @@ static void cirrus_dirty_update(struct cirrus_fbdev *afbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = cirrus_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c index f9adc27ef32a..13b7dd83faa9 100644 --- a/drivers/gpu/drm/mgag200/mgag200_fb.c +++ b/drivers/gpu/drm/mgag200/mgag200_fb.c @@ -41,7 +41,7 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev, * then the BO is being moved and we should * store up the damage until later. */ - if (!drm_can_sleep()) + if (drm_can_sleep()) ret = mgag200_bo_reserve(bo, true); if (ret) { if (ret != -EBUSY) From 7c4c62a04a2a80e3feb5d6c97aca1e413b11c790 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 30 Jan 2014 14:11:12 +1000 Subject: [PATCH 140/171] drm/radeon: allow geom rings to be setup on r600/r700 (v2) the evergreen CS parser has allowed this for a while, just port the code to the r600 one. This is required before geom shaders can be made work. v2: agd5f: minor cleanup and add additional 7xx reg. Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cs.c | 18 ++++++++++++++++-- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- drivers/gpu/drm/radeon/reg_srcs/r600 | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 7b399dc5fd54..2812c7d1ae6f 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1007,8 +1007,22 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case R_008C64_SQ_VSTMP_RING_SIZE: case R_0288C8_SQ_GS_VERT_ITEMSIZE: /* get value to populate the IB don't remove */ - tmp =radeon_get_ib_value(p, idx); - ib[idx] = 0; + /*tmp =radeon_get_ib_value(p, idx); + ib[idx] = 0;*/ + break; + case SQ_ESGS_RING_BASE: + case SQ_GSVS_RING_BASE: + case SQ_ESTMP_RING_BASE: + case SQ_GSTMP_RING_BASE: + case SQ_PSTMP_RING_BASE: + case SQ_VSTMP_RING_BASE: + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; case SQ_CONFIG: track->sq_config = radeon_get_ib_value(p, idx); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index ec8c388eec17..84a1bbb75f91 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -78,9 +78,10 @@ * 2.34.0 - Add CIK tiling mode array query * 2.35.0 - Add CIK macrotile mode array query * 2.36.0 - Fix CIK DCE tiling setup + * 2.37.0 - allow GS ring setup on r6xx/r7xx */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 36 +#define KMS_DRIVER_MINOR 37 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600 index 20bfbda7b3f1..ec0c6829c1dc 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r600 +++ b/drivers/gpu/drm/radeon/reg_srcs/r600 @@ -18,6 +18,7 @@ r600 0x9400 0x00028A3C VGT_GROUP_VECT_1_FMT_CNTL 0x00028A40 VGT_GS_MODE 0x00028A6C VGT_GS_OUT_PRIM_TYPE +0x00028B38 VGT_GS_MAX_VERT_OUT 0x000088C8 VGT_GS_PER_ES 0x000088E8 VGT_GS_PER_VS 0x000088D4 VGT_GS_VERTEX_REUSE From 4a7ac12eedd190cdf071e61145defa73df1675c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Feb 2014 11:30:48 +0000 Subject: [PATCH 141/171] arm64: barriers: allow dsb macro to take option parameter The dsb instruction takes an option specifying both the target access types and shareability domain. This patch allows such an option to be passed to the dsb macro, resulting in potentially more efficient code. Currently the option is ignored until all callers are updated (unlike ARM, the option is mandated by the assembler). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 78e20ba8806b..409ca370cfe2 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,7 +25,7 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") -#define dsb() asm volatile("dsb sy" : : : "memory") +#define dsb(opt) asm volatile("dsb sy" : : : "memory") #define mb() dsb() #define rmb() asm volatile("dsb ld" : : : "memory") From 97b8b16bfcb7f5ccc1b3c10c08580f9f1acb61ac Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 5 Feb 2014 22:05:44 +0200 Subject: [PATCH 142/171] MIPS: fpu.h: Fix build when CONFIG_BUG is not set __enable_fpu produces a build failure when CONFIG_BUG is not set: In file included from arch/mips/kernel/cpu-probe.c:24:0: arch/mips/include/asm/fpu.h: In function '__enable_fpu': arch/mips/include/asm/fpu.h:77:1: error: control reaches end of non-void function [-Werror=return-type] This is regression introduced in 3.14-rc1. Fix that. Signed-off-by: Aaro Koskinen Acked-by: Paul Burton Cc: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6504/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/fpu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index cfe092fc720d..6b9749540edf 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -74,6 +74,8 @@ static inline int __enable_fpu(enum fpu_mode mode) default: BUG(); } + + return SIGFPE; } #define __disable_fpu() \ From e7f2a444891cb39f11d5429467d0fd7e011fe7fe Mon Sep 17 00:00:00 2001 From: Florian Vaussard Date: Wed, 5 Feb 2014 07:51:22 +0100 Subject: [PATCH 143/171] pinctrl: do not init debugfs entries for unimplemented functionalities Commit c420619 "pinctrl: pinconf: remove checks on ops->pin_config_get" removed the check on (ops != NULL) when performing pinconf_pins_show() or pinconf_groups_show(). As these entries are always enabled, even if pinconf is not supported, reading will result in an oops due to NULL ops. Instead of checking for ops, remove the corresponding debugfs entries if pinconf and/or pinmux are not implemented. Tested on OMAP3 (pinctrl-single). Cc: stable@vger.kernel.org Signed-off-by: Florian Vaussard Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index cab020a58bf5..c0fe6091566a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1644,8 +1644,10 @@ static void pinctrl_init_device_debugfs(struct pinctrl_dev *pctldev) device_root, pctldev, &pinctrl_groups_ops); debugfs_create_file("gpio-ranges", S_IFREG | S_IRUGO, device_root, pctldev, &pinctrl_gpioranges_ops); - pinmux_init_device_debugfs(device_root, pctldev); - pinconf_init_device_debugfs(device_root, pctldev); + if (pctldev->desc->pmxops) + pinmux_init_device_debugfs(device_root, pctldev); + if (pctldev->desc->confops) + pinconf_init_device_debugfs(device_root, pctldev); } static void pinctrl_remove_device_debugfs(struct pinctrl_dev *pctldev) From 5b232c5addc02980cfce451575f1d1f975bdb04e Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 5 Feb 2014 19:11:34 +0530 Subject: [PATCH 144/171] pinctrl: tegra: return correct error type When memory allocation failed, drive should return error as ENOMEM. Signed-off-by: Laxman Dewangan Acked-by: Stephen Warren Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c index a2e93a2b5ff4..e767355ab0ad 100644 --- a/drivers/pinctrl/pinctrl-tegra.c +++ b/drivers/pinctrl/pinctrl-tegra.c @@ -645,7 +645,7 @@ int tegra_pinctrl_probe(struct platform_device *pdev, GFP_KERNEL); if (!pmx->regs) { dev_err(&pdev->dev, "Can't alloc regs pointer\n"); - return -ENODEV; + return -ENOMEM; } for (i = 0; i < pmx->nbanks; i++) { From 6583327c4dd55acbbf2a6f25e775b28b3abf9a42 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 6 Feb 2014 15:58:20 +0100 Subject: [PATCH 145/171] x86, hweight: Fix BUG when booting with CONFIG_GCOV_PROFILE_ALL=y Commit d61931d89b, "x86: Add optimized popcnt variants" introduced compile flag -fcall-saved-rdi for lib/hweight.c. When combined with options -fprofile-arcs and -O2, this flag causes gcc to generate broken constructor code. As a result, a 64 bit x86 kernel compiled with CONFIG_GCOV_PROFILE_ALL=y prints message "gcov: could not create file" and runs into sproadic BUGs during boot. The gcc people indicate that these kinds of problems are endemic when using ad hoc calling conventions. It is therefore best to treat any file compiled with ad hoc calling conventions as an isolated environment and avoid things like profiling or coverage analysis, since those subsystems assume a "normal" calling conventions. This patch avoids the bug by excluding lib/hweight.o from coverage profiling. Reported-by: Meelis Roos Cc: Andrew Morton Signed-off-by: Peter Oberparleiter Link: http://lkml.kernel.org/r/52F3A30C.7050205@linux.vnet.ibm.com Signed-off-by: H. Peter Anvin Cc: --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..04944e9993ed 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +GCOV_PROFILE_hweight.o := n CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o From 75a1ba5b2c529db60ca49626bcaf0bddf4548438 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 3 Feb 2014 21:41:44 +0100 Subject: [PATCH 146/171] x86, microcode, AMD: Unify valid container checks For additional coverage, BorisO and friends unknowlingly did swap AMD microcode with Intel microcode blobs in order to see what happens. What did happen on 32-bit was [ 5.722656] BUG: unable to handle kernel paging request at be3a6008 [ 5.722693] IP: [] load_microcode_amd+0x24/0x3f0 [ 5.722716] *pdpt = 0000000000000000 *pde = 0000000000000000 because there was a valid initrd there but without valid microcode in it and the container check happened *after* the relocated ramdisk handling on 32-bit, which was clearly wrong. While at it, take care of the ramdisk relocation on both 32- and 64-bit as it is done on both. Also, comment what we're doing because this code is a bit tricky. Reported-and-tested-by: Boris Ostrovsky Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1391460104-7261-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/microcode/amd_early.c | 43 +++++++++++++++-------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 8384c0fa206f..617a9e284245 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c @@ -285,6 +285,15 @@ static void __init collect_cpu_sig_on_bsp(void *arg) uci->cpu_sig.sig = cpuid_eax(0x00000001); } + +static void __init get_bsp_sig(void) +{ + unsigned int bsp = boot_cpu_data.cpu_index; + struct ucode_cpu_info *uci = ucode_cpu_info + bsp; + + if (!uci->cpu_sig.sig) + smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); +} #else void load_ucode_amd_ap(void) { @@ -337,31 +346,37 @@ void load_ucode_amd_ap(void) int __init save_microcode_in_initrd_amd(void) { + unsigned long cont; enum ucode_state ret; u32 eax; -#ifdef CONFIG_X86_32 - unsigned int bsp = boot_cpu_data.cpu_index; - struct ucode_cpu_info *uci = ucode_cpu_info + bsp; + if (!container) + return -EINVAL; - if (!uci->cpu_sig.sig) - smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); +#ifdef CONFIG_X86_32 + get_bsp_sig(); + cont = (unsigned long)container; +#else + /* + * We need the physical address of the container for both bitness since + * boot_params.hdr.ramdisk_image is a physical address. + */ + cont = __pa(container); +#endif /* - * Take into account the fact that the ramdisk might get relocated - * and therefore we need to recompute the container's position in - * virtual memory space. + * Take into account the fact that the ramdisk might get relocated and + * therefore we need to recompute the container's position in virtual + * memory space. */ - container = (u8 *)(__va((u32)relocated_ramdisk) + - ((u32)container - boot_params.hdr.ramdisk_image)); -#endif + if (relocated_ramdisk) + container = (u8 *)(__va(relocated_ramdisk) + + (cont - boot_params.hdr.ramdisk_image)); + if (ucode_new_rev) pr_info("microcode: updated early to new patch_level=0x%08x\n", ucode_new_rev); - if (!container) - return -EINVAL; - eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); From 277cba1d28b99169f2a056d0d6f98a4039531cb8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Feb 2014 12:04:19 -0800 Subject: [PATCH 147/171] Documentation/kernel-parameters.txt: fix memmap= language Clean up descriptions of memmap= boot options. Add periods (full stops), drop commas, change "used" to "reserved" or "marked". Signed-off-by: Randy Dunlap Cc: Andiry Xu Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8f441dab0396..7116fda7077f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. option description. memmap=nn[KMG]@ss[KMG] - [KNL] Force usage of a specific region of memory - Region of memory to be used, from ss to ss+nn. + [KNL] Force usage of a specific region of memory. + Region of memory to be used is from ss to ss+nn. memmap=nn[KMG]#ss[KMG] [KNL,ACPI] Mark specific memory as ACPI data. - Region of memory to be used, from ss to ss+nn. + Region of memory to be marked is from ss to ss+nn. memmap=nn[KMG]$ss[KMG] [KNL,ACPI] Mark specific memory as reserved. - Region of memory to be used, from ss to ss+nn. + Region of memory to be reserved is from ss to ss+nn. Example: Exclude memory from 0x18690000-0x1869ffff memmap=64K$0x18690000 or From fb951eb5e167de9f07973ce0dfff674a2019bfab Mon Sep 17 00:00:00 2001 From: Zongxun Wang Date: Thu, 6 Feb 2014 12:04:20 -0800 Subject: [PATCH 148/171] ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters Even if using the same jbd2 handle, we cannot rollback a transaction. So once some error occurs after successfully allocating clusters, the allocated clusters will never be used and it means they are lost. For example, call ocfs2_claim_clusters successfully when expanding a file, but failed in ocfs2_insert_extent. So we need free the allocated clusters if they are not used indeed. Signed-off-by: Zongxun Wang Signed-off-by: Joseph Qi Acked-by: Joel Becker Cc: Mark Fasheh Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 38 +++++++++++++++++++++++++++++++++++--- fs/ocfs2/localalloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/localalloc.h | 6 ++++++ 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 8750ae1b8636..aada5801567a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, enum ocfs2_alloc_restarted *reason_ret) { int status = 0, err = 0; + int need_free = 0; int free_extents; enum ocfs2_alloc_restarted reason = RESTART_NONE; u32 bit_off, num_bits; @@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); - goto leave; + need_free = 1; + goto bail; } block = ocfs2_clusters_to_blocks(osb->sb, bit_off); @@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, num_bits, flags, meta_ac); if (status < 0) { mlog_errno(status); - goto leave; + need_free = 1; + goto bail; } ocfs2_journal_dirty(handle, et->et_root_bh); @@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle, reason = RESTART_TRANS; } +bail: + if (need_free) { + if (data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + bit_off, num_bits); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, bit_off), + num_bits); + } + leave: if (reason_ret) *reason_ret = reason; @@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct buffer_head *di_bh) { int ret, i, has_data, num_pages = 0; + int need_free = 0; + u32 bit_off, num; handle_t *handle; u64 uninitialized_var(block); struct ocfs2_inode_info *oi = OCFS2_I(inode); @@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } if (has_data) { - u32 bit_off, num; unsigned int page_end; u64 phys; @@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); if (ret) { mlog_errno(ret); + need_free = 1; goto out_commit; } @@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, ret = ocfs2_read_inline_data(inode, pages[0], di_bh); if (ret) { mlog_errno(ret); + need_free = 1; goto out_commit; } @@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL); if (ret) { mlog_errno(ret); + need_free = 1; goto out_commit; } @@ -6938,6 +6958,18 @@ out_commit: dquot_free_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); + if (need_free) { + if (data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + bit_off, num); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, bit_off), + num); + } + ocfs2_commit_trans(osb, handle); out_unlock: diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index cd5496b7a0a3..044013455621 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -781,6 +781,48 @@ bail: return status; } +int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + u32 bit_off, + u32 num_bits) +{ + int status, start; + u32 clear_bits; + struct inode *local_alloc_inode; + void *bitmap; + struct ocfs2_dinode *alloc; + struct ocfs2_local_alloc *la; + + BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); + + local_alloc_inode = ac->ac_inode; + alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; + la = OCFS2_LOCAL_ALLOC(alloc); + + bitmap = la->la_bitmap; + start = bit_off - le32_to_cpu(la->la_bm_off); + clear_bits = num_bits; + + status = ocfs2_journal_access_di(handle, + INODE_CACHE(local_alloc_inode), + osb->local_alloc_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + while (clear_bits--) + ocfs2_clear_bit(start++, bitmap); + + le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); + ocfs2_journal_dirty(handle, osb->local_alloc_bh); + +bail: + return status; +} + static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) { u32 count; diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 1be9b5864460..44a7d1fb2dec 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h @@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, u32 *bit_off, u32 *num_bits); +int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + u32 bit_off, + u32 num_bits); + void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, unsigned int num_clusters); void ocfs2_la_enable_worker(struct work_struct *work); From 579f82901f6f41256642936d7e632f3979ad76d4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 6 Feb 2014 12:04:21 -0800 Subject: [PATCH 149/171] swap: add a simple detector for inappropriate swapin readahead This is a patch to improve swap readahead algorithm. It's from Hugh and I slightly changed it. Hugh's original changelog: swapin readahead does a blind readahead, whether or not the swapin is sequential. This may be ok on harddisk, because large reads have relatively small costs, and if the readahead pages are unneeded they can be reclaimed easily - though, what if their allocation forced reclaim of useful pages? But on SSD devices large reads are more expensive than small ones: if the readahead pages are unneeded, reading them in caused significant overhead. This patch adds very simplistic random read detection. Stealing the PageReadahead technique from Konstantin Khlebnikov's patch, avoiding the vma/anon_vma sophistications of Shaohua Li's patch, swapin_nr_pages() simply looks at readahead's current success rate, and narrows or widens its readahead window accordingly. There is little science to its heuristic: it's about as stupid as can be whilst remaining effective. The table below shows elapsed times (in centiseconds) when running a single repetitive swapping load across a 1000MB mapping in 900MB ram with 1GB swap (the harddisk tests had taken painfully too long when I used mem=500M, but SSD shows similar results for that). Vanilla is the 3.6-rc7 kernel on which I started; Shaohua denotes his Sep 3 patch in mmotm and linux-next; HughOld denotes my Oct 1 patch which Shaohua showed to be defective; HughNew this Nov 14 patch, with page_cluster as usual at default of 3 (8-page reads); HughPC4 this same patch with page_cluster 4 (16-page reads); HughPC0 with page_cluster 0 (1-page reads: no readahead). HDD for swapping to harddisk, SSD for swapping to VertexII SSD. Seq for sequential access to the mapping, cycling five times around; Rand for the same number of random touches. Anon for a MAP_PRIVATE anon mapping; Shmem for a MAP_SHARED anon mapping, equivalent to tmpfs. One weakness of Shaohua's vma/anon_vma approach was that it did not optimize Shmem: seen below. Konstantin's approach was perhaps mistuned, 50% slower on Seq: did not compete and is not shown below. HDD Vanilla Shaohua HughOld HughNew HughPC4 HughPC0 Seq Anon 73921 76210 75611 76904 78191 121542 Seq Shmem 73601 73176 73855 72947 74543 118322 Rand Anon 895392 831243 871569 845197 846496 841680 Rand Shmem 1058375 1053486 827935 764955 764376 756489 SSD Vanilla Shaohua HughOld HughNew HughPC4 HughPC0 Seq Anon 24634 24198 24673 25107 21614 70018 Seq Shmem 24959 24932 25052 25703 22030 69678 Rand Anon 43014 26146 28075 25989 26935 25901 Rand Shmem 45349 45215 28249 24268 24138 24332 These tests are, of course, two extremes of a very simple case: under heavier mixed loads I've not yet observed any consistent improvement or degradation, and wider testing would be welcome. Shaohua Li: Test shows Vanilla is slightly better in sequential workload than Hugh's patch. I observed with Hugh's patch sometimes the readahead size is shrinked too fast (from 8 to 1 immediately) in sequential workload if there is no hit. And in such case, continuing doing readahead is good actually. I don't prepare a sophisticated algorithm for the sequential workload because so far we can't guarantee sequential accessed pages are swap out sequentially. So I slightly change Hugh's heuristic - don't shrink readahead size too fast. Here is my test result (unit second, 3 runs average): Vanilla Hugh New Seq 356 370 360 Random 4525 2447 2444 Attached graph is the swapin/swapout throughput I collected with 'vmstat 2'. The first part is running a random workload (till around 1200 of the x-axis) and the second part is running a sequential workload. swapin and swapout throughput are almost identical in steady state in both workloads. These are expected behavior. while in Vanilla, swapin is much bigger than swapout especially in random workload (because wrong readahead). Original patches by: Shaohua Li and Konstantin Khlebnikov. [fengguang.wu@intel.com: swapin_nr_pages() can be static] Signed-off-by: Hugh Dickins Signed-off-by: Shaohua Li Signed-off-by: Fengguang Wu Cc: Rik van Riel Cc: Wu Fengguang Cc: Minchan Kim Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 4 +-- mm/swap_state.c | 63 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e464b4e987e8..d1fe1a761047 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) PAGEFLAG(MappedToDisk, mappedtodisk) -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +/* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) #ifdef CONFIG_HIGHMEM /* diff --git a/mm/swap_state.c b/mm/swap_state.c index 98e85e9c2b2d..e76ace30d436 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void) return ret; } +static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); + void show_swap_cache_info(void) { printk("%lu pages in swap cache\n", total_swapcache_pages()); @@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry) page = find_get_page(swap_address_space(entry), entry.val); - if (page) + if (page) { INC_CACHE_INFO(find_success); + if (TestClearPageReadahead(page)) + atomic_inc(&swapin_readahead_hits); + } INC_CACHE_INFO(find_total); return page; @@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, return found_page; } +static unsigned long swapin_nr_pages(unsigned long offset) +{ + static unsigned long prev_offset; + unsigned int pages, max_pages, last_ra; + static atomic_t last_readahead_pages; + + max_pages = 1 << ACCESS_ONCE(page_cluster); + if (max_pages <= 1) + return 1; + + /* + * This heuristic has been found to work well on both sequential and + * random loads, swapping to hard disk or to SSD: please don't ask + * what the "+ 2" means, it just happens to work well, that's all. + */ + pages = atomic_xchg(&swapin_readahead_hits, 0) + 2; + if (pages == 2) { + /* + * We can have no readahead hits to judge by: but must not get + * stuck here forever, so check for an adjacent offset instead + * (and don't even bother to check whether swap type is same). + */ + if (offset != prev_offset + 1 && offset != prev_offset - 1) + pages = 1; + prev_offset = offset; + } else { + unsigned int roundup = 4; + while (roundup < pages) + roundup <<= 1; + pages = roundup; + } + + if (pages > max_pages) + pages = max_pages; + + /* Don't shrink readahead too fast */ + last_ra = atomic_read(&last_readahead_pages) / 2; + if (pages < last_ra) + pages = last_ra; + atomic_set(&last_readahead_pages, pages); + + return pages; +} + /** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory @@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; - unsigned long offset = swp_offset(entry); + unsigned long entry_offset = swp_offset(entry); + unsigned long offset = entry_offset; unsigned long start_offset, end_offset; - unsigned long mask = (1UL << page_cluster) - 1; + unsigned long mask; struct blk_plug plug; + mask = swapin_nr_pages(offset) - 1; + if (!mask) + goto skip; + /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; @@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, gfp_mask, vma, addr); if (!page) continue; + if (offset != entry_offset) + SetPageReadahead(page); page_cache_release(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ +skip: return read_swap_cache_async(entry, gfp_mask, vma, addr); } From f893ab41e4dae2fe8991faf5d86d029068d1ef3a Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Thu, 6 Feb 2014 12:04:23 -0800 Subject: [PATCH 150/171] mm/swap: fix race on swap_info reuse between swapoff and swapon swapoff clear swap_info's SWP_USED flag prematurely and free its resources after that. A concurrent swapon will reuse this swap_info while its previous resources are not cleared completely. These late freed resources are: - p->percpu_cluster - swap_cgroup_ctrl[type] - block_device setting - inode->i_flags &= ~S_SWAPFILE This patch clears the SWP_USED flag after all its resources are freed, so that swapon can reuse this swap_info by alloc_swap_info() safely. [akpm@linux-foundation.org: tidy up code comment] Signed-off-by: Weijie Yang Acked-by: Hugh Dickins Cc: Krzysztof Kozlowski Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index c6c13b050a58..4a7f7e6992b6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->swap_map = NULL; cluster_info = p->cluster_info; p->cluster_info = NULL; - p->flags = 0; frontswap_map = frontswap_map_get(p); spin_unlock(&p->lock); spin_unlock(&swap_lock); @@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) mutex_unlock(&inode->i_mutex); } filp_close(swap_file, NULL); + + /* + * Clear the SWP_USED flag after all resources are freed so that swapon + * can reuse this swap_info in alloc_swap_info() safely. It is ok to + * not hold p->lock after we cleared its SWP_WRITEOK. + */ + spin_lock(&swap_lock); + p->flags = 0; + spin_unlock(&swap_lock); + err = 0; atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); From a85d9df1ea1d23682a0ed1e100e6965006595d06 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 6 Feb 2014 12:04:24 -0800 Subject: [PATCH 151/171] mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq() During aio stress test, we observed the following lockdep warning. This mean AIO+numa_balancing is currently deadlockable. The problem is, aio_migratepage disable interrupt, but __set_page_dirty_nobuffers unintentionally enable it again. Generally, all helper function should use spin_lock_irqsave() instead of spin_lock_irq() because they don't know caller at all. other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&ctx->completion_lock)->rlock); lock(&(&ctx->completion_lock)->rlock); *** DEADLOCK *** dump_stack+0x19/0x1b print_usage_bug+0x1f7/0x208 mark_lock+0x21d/0x2a0 mark_held_locks+0xb9/0x140 trace_hardirqs_on_caller+0x105/0x1d0 trace_hardirqs_on+0xd/0x10 _raw_spin_unlock_irq+0x2c/0x50 __set_page_dirty_nobuffers+0x8c/0xf0 migrate_page_copy+0x434/0x540 aio_migratepage+0xb1/0x140 move_to_new_page+0x7d/0x230 migrate_pages+0x5e5/0x700 migrate_misplaced_page+0xbc/0xf0 do_numa_page+0x102/0x190 handle_pte_fault+0x241/0x970 handle_mm_fault+0x265/0x370 __do_page_fault+0x172/0x5a0 do_page_fault+0x1a/0x70 page_fault+0x28/0x30 Signed-off-by: KOSAKI Motohiro Cc: Larry Woodman Cc: Rik van Riel Cc: Johannes Weiner Acked-by: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2d30e2cfe804..7106cb1aca8e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page) if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; + unsigned long flags; if (!mapping) return 1; - spin_lock_irq(&mapping->tree_lock); + spin_lock_irqsave(&mapping->tree_lock, flags); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); @@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page) radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + spin_unlock_irqrestore(&mapping->tree_lock, flags); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); From 017c217a26e9bf6948482f751b30d0507e30a7d0 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 6 Feb 2014 12:04:25 -0800 Subject: [PATCH 152/171] arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug() On-stack variable numa_kernel_nodes in numa_clear_kernel_node_hotplug() was not initialized. So we need to initialize it. [akpm@linux-foundation.org: use NODE_MASK_NONE, per David] Signed-off-by: Tang Chen Tested-by: Gu Zheng Reported-by: Dave Jones Reported-by: David Rientjes Tested-by: Dave Jones Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 81b2750f3666..45ec9d72b6dd 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -565,7 +565,7 @@ static void __init numa_init_array(void) static void __init numa_clear_kernel_node_hotplug(void) { int i, nid; - nodemask_t numa_kernel_nodes; + nodemask_t numa_kernel_nodes = NODE_MASK_NONE; unsigned long start, end; struct memblock_type *type = &memblock.reserved; From 7bc35fdde6724549a0239b71e08b9f33d8bf2bfb Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 6 Feb 2014 12:04:27 -0800 Subject: [PATCH 153/171] arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved. The following path will cause array out of bound. memblock_add_region() will always set nid in memblock.reserved to MAX_NUMNODES. In numa_register_memblks(), after we set all nid to correct valus in memblock.reserved, we called setup_node_data(), and used memblock_alloc_nid() to allocate memory, with nid set to MAX_NUMNODES. The nodemask_t type can be seen as a bit array. And the index is 0 ~ MAX_NUMNODES-1. After that, when we call node_set() in numa_clear_kernel_node_hotplug(), the nodemask_t got an index of value MAX_NUMNODES, which is out of [0 ~ MAX_NUMNODES-1]. See below: numa_init() |---> numa_register_memblks() | |---> memblock_set_node(memory) set correct nid in memblock.memory | |---> memblock_set_node(reserved) set correct nid in memblock.reserved | |...... | |---> setup_node_data() | |---> memblock_alloc_nid() here, nid is set to MAX_NUMNODES (1024) |...... |---> numa_clear_kernel_node_hotplug() |---> node_set() here, we have an index 1024, and overflowed This patch moves nid setting to numa_clear_kernel_node_hotplug() to fix this problem. Reported-by: Dave Jones Signed-off-by: Tang Chen Tested-by: Gu Zheng Reported-by: Dave Jones Cc: David Rientjes Tested-by: Dave Jones Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 45ec9d72b6dd..27aa0455fab3 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) struct numa_memblk *mb = &mi->blk[i]; memblock_set_node(mb->start, mb->end - mb->start, &memblock.memory, mb->nid); - - /* - * At this time, all memory regions reserved by memblock are - * used by the kernel. Set the nid in memblock.reserved will - * mark out all the nodes the kernel resides in. - */ - memblock_set_node(mb->start, mb->end - mb->start, - &memblock.reserved, mb->nid); } /* @@ -569,6 +561,17 @@ static void __init numa_clear_kernel_node_hotplug(void) unsigned long start, end; struct memblock_type *type = &memblock.reserved; + /* + * At this time, all memory regions reserved by memblock are + * used by the kernel. Set the nid in memblock.reserved will + * mark out all the nodes the kernel resides in. + */ + for (i = 0; i < numa_meminfo.nr_blks; i++) { + struct numa_memblk *mb = &numa_meminfo.blk[i]; + memblock_set_node(mb->start, mb->end - mb->start, + &memblock.reserved, mb->nid); + } + /* Mark all kernel nodes. */ for (i = 0; i < type->cnt; i++) node_set(type->regions[i].nid, numa_kernel_nodes); From 227d53b397a32a7614667b3ecaf1d89902fb6c12 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 6 Feb 2014 12:04:28 -0800 Subject: [PATCH 154/171] mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq To use spin_{un}lock_irq is dangerous if caller disabled interrupt. During aio buffer migration, we have a possibility to see the following call stack. aio_migratepage [disable interrupt] migrate_page_copy clear_page_dirty_for_io set_page_dirty __set_page_dirty_buffers __set_page_dirty spin_lock_irq This mean, current aio migration is a deadlockable. spin_lock_irqsave is a safer alternative and we should use it. Signed-off-by: KOSAKI Motohiro Reported-by: David Rientjes rientjes@google.com> Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 651dba10b9c2..27265a8b43c1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); static void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { - spin_lock_irq(&mapping->tree_lock); + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + spin_unlock_irqrestore(&mapping->tree_lock, flags); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } From 4f545a4ba158600b7a780b9daaf9ffabb934cdb6 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Thu, 6 Feb 2014 18:03:17 +0000 Subject: [PATCH 155/171] hwmon: (da9055) Remove use of regmap_irq_get_virq() Remove use of regmap_irq_get_virq() in driver probe which was conflicting with use of platform_get_irq_byname(). platform_get_irq_byname() already returns the VIRQ number due to MFD core translation so using regmap_irq_get_virq() on that returned value results in an incorrect IRQ being requested. The driver probes then fail because of this. Signed-off-by: Adam Thomson Signed-off-by: Guenter Roeck --- drivers/hwmon/da9055-hwmon.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c index 029ecabc4380..73b3865f1207 100644 --- a/drivers/hwmon/da9055-hwmon.c +++ b/drivers/hwmon/da9055-hwmon.c @@ -278,10 +278,6 @@ static int da9055_hwmon_probe(struct platform_device *pdev) if (hwmon_irq < 0) return hwmon_irq; - hwmon_irq = regmap_irq_get_virq(hwmon->da9055->irq_data, hwmon_irq); - if (hwmon_irq < 0) - return hwmon_irq; - ret = devm_request_threaded_irq(&pdev->dev, hwmon_irq, NULL, da9055_auxadc_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, From 8e86f0b409a44193f1587e87b69c5dcf8f65be67 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:12 +0000 Subject: [PATCH 156/171] arm64: atomics: fix use of acquire + release for full barrier semantics Linux requires a number of atomic operations to provide full barrier semantics, that is no memory accesses after the operation can be observed before any accesses up to and including the operation in program order. On arm64, these operations have been incorrectly implemented as follows: // A, B, C are independent memory locations // atomic_op (B) 1: ldaxr x0, [B] // Exclusive load with acquire stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b The assumption here being that two half barriers are equivalent to a full barrier, so the only permitted ordering would be A -> B -> C (where B is the atomic operation involving both a load and a store). Unfortunately, this is not the case by the letter of the architecture and, in fact, the accesses to A and C are permitted to pass their nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the store-release on B). This is a clear violation of the full barrier requirement. The simple way to fix this is to implement the same algorithm as ARMv7 using explicit barriers: // atomic_op (B) dmb ish // Full barrier 1: ldxr x0, [B] // Exclusive load stxr w1, x0, [B] // Exclusive store cbnz w1, 1b dmb ish // Full barrier but this has the undesirable effect of introducing *two* full barrier instructions. A better approach is actually the following, non-intuitive sequence: // atomic_op (B) 1: ldxr x0, [B] // Exclusive load stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b dmb ish // Full barrier The simple observations here are: - The dmb ensures that no subsequent accesses (e.g. the access to C) can enter or pass the atomic sequence. - The dmb also ensures that no prior accesses (e.g. the access to A) can pass the atomic sequence. - Therefore, no prior access can pass a subsequent access, or vice-versa (i.e. A is strictly ordered before C). - The stlxr ensures that no prior access can pass the store component of the atomic operation. The only tricky part remaining is the ordering between the ldxr and the access to A, since the absence of the first dmb means that we're now permitting re-ordering between the ldxr and any prior accesses. From an (arbitrary) observer's point of view, there are two scenarios: 1. We have observed the ldxr. This means that if we perform a store to [B], the ldxr will still return older data. If we can observe the ldxr, then we can potentially observe the permitted re-ordering with the access to A, which is clearly an issue when compared to the dmb variant of the code. Thankfully, the exclusive monitor will save us here since it will be cleared as a result of the store and the ldxr will retry. Notice that any use of a later memory observation to imply observation of the ldxr will also imply observation of the access to A, since the stlxr/dmb ensure strict ordering. 2. We have not observed the ldxr. This means we can perform a store and influence the later ldxr. However, that doesn't actually tell us anything about the access to [A], so we've not lost anything here either when compared to the dmb variant. This patch implements this solution for our barriered atomic operations, ensuring that we satisfy the full barrier requirements where they are needed. Cc: Cc: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 29 ++++++++++++++++++++--------- arch/arm64/include/asm/cmpxchg.h | 9 +++++---- arch/arm64/include/asm/futex.h | 6 ++++-- arch/arm64/kernel/kuser32.S | 6 ++++-- arch/arm64/lib/bitops.S | 3 ++- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 01de5aaa3edc..e32893e005d4 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -64,7 +64,7 @@ static inline int atomic_add_return(int i, atomic_t *v) int result; asm volatile("// atomic_add_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " add %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -72,6 +72,7 @@ static inline int atomic_add_return(int i, atomic_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -96,7 +97,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) int result; asm volatile("// atomic_sub_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " sub %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -104,6 +105,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -112,17 +114,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) unsigned long tmp; int oldval; + smp_mb(); + asm volatile("// atomic_cmpxchg\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " cmp %w1, %w3\n" " b.ne 2f\n" -" stlxr %w0, %w4, %2\n" +" stxr %w0, %w4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc", "memory"); + smp_mb(); return oldval; } @@ -183,7 +188,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_add_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " add %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -191,6 +196,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -215,7 +221,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_sub_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " sub %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -223,6 +229,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -231,17 +238,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) long oldval; unsigned long res; + smp_mb(); + asm volatile("// atomic64_cmpxchg\n" -"1: ldaxr %1, %2\n" +"1: ldxr %1, %2\n" " cmp %1, %3\n" " b.ne 2f\n" -" stlxr %w0, %4, %2\n" +" stxr %w0, %4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc", "memory"); + smp_mb(); return oldval; } @@ -253,11 +263,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " subs %0, %0, #1\n" " b.mi 2f\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" +" dmb ish\n" "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 56166d7f4a25..189390ce8653 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size switch (size) { case 1: asm volatile("// __xchg1\n" - "1: ldaxrb %w0, %2\n" + "1: ldxrb %w0, %2\n" " stlxrb %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) @@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 2: asm volatile("// __xchg2\n" - "1: ldaxrh %w0, %2\n" + "1: ldxrh %w0, %2\n" " stlxrh %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) @@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 4: asm volatile("// __xchg4\n" - "1: ldaxr %w0, %2\n" + "1: ldxr %w0, %2\n" " stlxr %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) @@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 8: asm volatile("// __xchg8\n" - "1: ldaxr %0, %2\n" + "1: ldxr %0, %2\n" " stlxr %w1, %3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) @@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size BUILD_BUG(); } + smp_mb(); return ret; } diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 78cc3aba5d69..572193d0005d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -24,10 +24,11 @@ #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ -"1: ldaxr %w1, %2\n" \ +"1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ " cbnz %w3, 1b\n" \ +" dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" " cbnz %w3, 3f\n" "2: stlxr %w3, %w5, %2\n" " cbnz %w3, 1b\n" +" dmb ish\n" "3:\n" " .pushsection .fixup,\"ax\"\n" "4: mov %w0, %w6\n" diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 63c48ffdf230..7787208e8cc6 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -38,12 +38,13 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] + .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr @@ -55,11 +56,12 @@ __kuser_memory_barrier: // 0xffff0fa0 .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xe1923e9f // 1: ldaex r3, [r2] + .inst 0xe1923f9f // 1: ldrex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe12fff1e // bx lr diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S index e5db797790d3..7dac371cc9a2 100644 --- a/arch/arm64/lib/bitops.S +++ b/arch/arm64/lib/bitops.S @@ -46,11 +46,12 @@ ENTRY( \name ) mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset lsl x4, x2, x3 // Create mask -1: ldaxr x2, [x1] +1: ldxr x2, [x1] lsr x0, x2, x3 // Save old value of bit \instr x2, x2, x4 // toggle bit stlxr w5, x2, [x1] cbnz w5, 1b + dmb ish and x0, x0, #1 3: ret ENDPROC(\name ) From 95c4189689f92fba7ecf9097173404d4928c6e9b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:13 +0000 Subject: [PATCH 157/171] arm64: asm: remove redundant "cc" clobbers cbnz/tbnz don't update the condition flags, so remove the "cc" clobbers from inline asm blocks that only use these instructions to implement conditional branches. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 24 ++++++++++-------------- arch/arm64/include/asm/cmpxchg.h | 8 ++++---- arch/arm64/include/asm/futex.h | 4 ++-- arch/arm64/include/asm/spinlock.h | 10 +++++----- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index e32893e005d4..0237f0867e37 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_add_return(int i, atomic_t *v) @@ -70,7 +69,7 @@ static inline int atomic_add_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -87,8 +86,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_sub_return(int i, atomic_t *v) @@ -103,7 +101,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -125,7 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); smp_mb(); return oldval; @@ -178,8 +176,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_add_return(long i, atomic64_t *v) @@ -194,7 +191,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -211,8 +208,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_sub_return(long i, atomic64_t *v) @@ -227,7 +223,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -249,7 +245,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); smp_mb(); return oldval; diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 189390ce8653..57c0fa7bf711 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -34,7 +34,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 2: asm volatile("// __xchg2\n" @@ -43,7 +43,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 4: asm volatile("// __xchg4\n" @@ -52,7 +52,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 8: asm volatile("// __xchg8\n" @@ -61,7 +61,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; default: BUILD_BUG(); diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 572193d0005d..5f750dc96e0f 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -41,7 +41,7 @@ " .popsection\n" \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "memory") static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -129,7 +129,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .popsection\n" : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) - : "cc", "memory"); + : "memory"); *uval = val; return ret; diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 3d5cf064d7a1..c45b7b1b7197 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -132,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 2b\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); } static inline int arch_write_trylock(arch_rwlock_t *rw) @@ -146,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); return !tmp; } @@ -187,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " cbnz %w1, 2b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -201,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) @@ -216,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); return !tmp2; } From 55834a773fe343912b705bef8114ec93fd337188 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 7 Feb 2014 17:12:45 +0000 Subject: [PATCH 158/171] arm64: defconfig: Expand default enabled features FPGA implementations of the Cortex-A57 and Cortex-A53 are now available in the form of the SMM-A57 and SMM-A53 Soft Macrocell Models (SMMs) for Versatile Express. As these attach to a Motherboard Express V2M-P1 it would be useful to have support for some V2M-P1 peripherals enabled by default. Additionally a couple of of features have been introduced since the last defconfig update (CMA, jump labels) that would be good to have enabled by default to ensure they are build and boot tested. This patch updates the arm64 defconfig to enable support for these devices and features. The arm64 Kconfig is modified to select HAVE_PATA_PLATFORM, which is required to enable support for the CompactFlash controller on the V2M-P1. A few options which don't need to appear in defconfig are trimmed: * BLK_DEV - selected by default * EXPERIMENTAL - otherwise gone from the kernel * MII - selected by drivers which require it * USB_SUPPORT - selected by default Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/configs/defconfig | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dd4327f09ba4..27bbcfc7202a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -36,6 +36,7 @@ config ARM64 select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_MEMBLOCK + select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select IRQ_DOMAIN select MODULES_USE_ELF_RELA diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 84139be62ae6..7959dd0ca5d5 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -19,6 +18,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT=y +CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -42,14 +43,17 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y -CONFIG_BLK_DEV=y +CONFIG_DMA_CMA=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_PATA_PLATFORM=y +CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_MII=y CONFIG_SMC91X=y +CONFIG_SMSC911X=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set @@ -62,13 +66,19 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_FB=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y From 0bbfdfe8d25fcc1d5c2edb6b060fb0c5cf66aff9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 19:33:39 +0200 Subject: [PATCH 159/171] libceph: factor out logic from ceph_osdc_start_request() Factor out logic from ceph_osdc_start_request() into a new helper, __ceph_osdc_start_request(). ceph_osdc_start_request() now amounts to taking locks and calling __ceph_osdc_start_request(). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 62 +++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 166d4c7ba065..2aa82b6bb305 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1426,6 +1426,40 @@ static void __send_queued(struct ceph_osd_client *osdc) __send_request(osdc, req); } +/* + * Caller should hold map_sem for read and request_mutex. + */ +static int __ceph_osdc_start_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req, + bool nofail) +{ + int rc; + + __register_request(osdc, req); + req->r_sent = 0; + req->r_got_reply = 0; + rc = __map_request(osdc, req, 0); + if (rc < 0) { + if (nofail) { + dout("osdc_start_request failed map, " + " will retry %lld\n", req->r_tid); + rc = 0; + } else { + __unregister_request(osdc, req); + } + return rc; + } + + if (req->r_osd == NULL) { + dout("send_request %p no up osds in pg\n", req); + ceph_monc_request_next_osdmap(&osdc->client->monc); + } else { + __send_queued(osdc); + } + + return 0; +} + /* * Timeout callback, called every N seconds when 1 or more osd * requests has been active for more than N seconds. When this @@ -2351,34 +2385,16 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, bool nofail) { - int rc = 0; + int rc; down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); - __register_request(osdc, req); - req->r_sent = 0; - req->r_got_reply = 0; - rc = __map_request(osdc, req, 0); - if (rc < 0) { - if (nofail) { - dout("osdc_start_request failed map, " - " will retry %lld\n", req->r_tid); - rc = 0; - } else { - __unregister_request(osdc, req); - } - goto out_unlock; - } - if (req->r_osd == NULL) { - dout("send_request %p no up osds in pg\n", req); - ceph_monc_request_next_osdmap(&osdc->client->monc); - } else { - __send_queued(osdc); - } - rc = 0; -out_unlock: + + rc = __ceph_osdc_start_request(osdc, req, nofail); + mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); + return rc; } EXPORT_SYMBOL(ceph_osdc_start_request); From ff513ace9b772e75e337f8e058cc7f12816843fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 3 Feb 2014 13:56:33 +0200 Subject: [PATCH 160/171] libceph: take map_sem for read in handle_reply() Handling redirect replies requires both map_sem and request_mutex. Taking map_sem unconditionally near the top of handle_reply() avoids possible race conditions that arise from releasing request_mutex to be able to acquire map_sem in redirect reply case. (Lock ordering is: map_sem, request_mutex, crush_mutex.) Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2aa82b6bb305..0676f2b199d6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1687,6 +1687,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, osdmap_epoch = ceph_decode_32(&p); /* lookup */ + down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); if (req == NULL) { @@ -1743,7 +1744,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("redirect pool %lld\n", redir.oloc.pool); __unregister_request(osdc, req); - mutex_unlock(&osdc->request_mutex); req->r_target_oloc = redir.oloc; /* struct */ @@ -1755,10 +1755,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, * successfully. In the future we might want to follow * original request's nofail setting here. */ - err = ceph_osdc_start_request(osdc, req, true); + err = __ceph_osdc_start_request(osdc, req, true); BUG_ON(err); - goto done; + goto out_unlock; } already_completed = req->r_got_reply; @@ -1776,8 +1776,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { dout("handle_reply tid %llu dup ack\n", tid); - mutex_unlock(&osdc->request_mutex); - goto done; + goto out_unlock; } dout("handle_reply tid %llu flags %d\n", tid, flags); @@ -1792,6 +1791,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); if (!already_completed) { if (req->r_unsafe_callback && @@ -1809,10 +1809,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, complete_request(req); } -done: +out: dout("req=%p req->r_linger=%d\n", req, req->r_linger); ceph_osdc_put_request(req); return; +out_unlock: + mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); + goto out; bad_put: req->r_result = -EIO; @@ -1825,6 +1829,7 @@ bad_put: ceph_osdc_put_request(req); bad_mutex: mutex_unlock(&osdc->request_mutex); + up_read(&osdc->map_sem); bad: pr_err("corrupt osd_op_reply got %d %d\n", (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); From 0ec1d15ec6ed513ab2cc86c67d94761d71228a32 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Feb 2014 15:19:55 +0200 Subject: [PATCH 161/171] libceph: do not dereference a NULL bio pointer Commit f38a5181d9f3 ("ceph: Convert to immutable biovecs") introduced a NULL pointer dereference, which broke rbd in -rc1. Fix it. Cc: Kent Overstreet Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0e478a0f4204..30efc5c18622 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -840,9 +840,13 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, if (!cursor->bvec_iter.bi_size) { bio = bio->bi_next; - cursor->bvec_iter = bio->bi_iter; + cursor->bio = bio; + if (bio) + cursor->bvec_iter = bio->bi_iter; + else + memset(&cursor->bvec_iter, 0, + sizeof(cursor->bvec_iter)); } - cursor->bio = bio; if (!cursor->last_piece) { BUG_ON(!cursor->resid); From 1ccfe6f982674e4b7bd832b904bafcb3db890252 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 31 Jan 2014 13:47:34 +0100 Subject: [PATCH 162/171] watchdog: dw_wdt: Add dependency on HAS_IOMEM On archs like S390 or um this driver cannot build nor work. Make it depend on HAS_IOMEM to bypass build failures. drivers/built-in.o: In function `dw_wdt_drv_probe': drivers/watchdog/dw_wdt.c:302: undefined reference to `devm_ioremap_resource' Signed-off-by: Richard Weinberger Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4c4c566c52a3..79d25894343a 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -223,6 +223,7 @@ config SA1100_WATCHDOG config DW_WATCHDOG tristate "Synopsys DesignWare watchdog" + depends on HAS_IOMEM help Say Y here if to include support for the Synopsys DesignWare watchdog timer found in many chips. From c18f7b51200c3c8b76c63e391f9995b65ace9c83 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 7 Feb 2014 14:36:10 -0600 Subject: [PATCH 163/171] jfs: fix generic posix ACL regression I missed a couple errors in reviewing the patches converting jfs to use the generic posix ACL function. Setting ACL's currently fails with -EOPNOTSUPP. Signed-off-by: Dave Kleikamp Reported-by: Michael L. Semon Reviewed-by: Christoph Hellwig --- fs/jfs/xattr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 3bd5ee45f7b3..46325d5c34fc 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -854,9 +854,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, int rc; tid_t tid; - if ((rc = can_set_xattr(inode, name, value, value_len))) - return rc; - /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler @@ -865,6 +862,9 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_setxattr(dentry, name, value, value_len, flags); + if ((rc = can_set_xattr(inode, name, value, value_len))) + return rc; + if (value == NULL) { /* empty EA, do not remove */ value = ""; value_len = 0; @@ -1034,9 +1034,6 @@ int jfs_removexattr(struct dentry *dentry, const char *name) int rc; tid_t tid; - if ((rc = can_set_xattr(inode, name, NULL, 0))) - return rc; - /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler @@ -1045,6 +1042,9 @@ int jfs_removexattr(struct dentry *dentry, const char *name) if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_removexattr(dentry, name); + if ((rc = can_set_xattr(inode, name, NULL, 0))) + return rc; + tid = txBegin(inode->i_sb, 0); mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); @@ -1061,7 +1061,7 @@ int jfs_removexattr(struct dentry *dentry, const char *name) * attributes are handled directly. */ const struct xattr_handler *jfs_xattr_handlers[] = { -#ifdef JFS_POSIX_ACL +#ifdef CONFIG_JFS_POSIX_ACL &posix_acl_access_xattr_handler, &posix_acl_default_xattr_handler, #endif From 6cc98d90f8d14f8ebce2391323929024d7eef39f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 5 Feb 2014 16:19:21 -0500 Subject: [PATCH 164/171] Btrfs: fix assert screwup for the pending move stuff Wang noticed that he was failing btrfs/030 even though me and Filipe couldn't reproduce. Turns out this is because Wang didn't have CONFIG_BTRFS_ASSERT set, which meant that a key part of Filipe's original patch was not being built in. This appears to be a mess up with merging Filipe's patch as it does not exist in his original patch. Fix this by changing how we make sure del_waiting_dir_move asserts that it did not error and take the function out of the ifdef check. This makes btrfs/030 pass with the assert on or off. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/send.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index cf9107a64204..9c8d1a3fdc3a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2774,8 +2774,6 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) return 0; } -#ifdef CONFIG_BTRFS_ASSERT - static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) { struct rb_node *n = sctx->waiting_dir_moves.rb_node; @@ -2796,8 +2794,6 @@ static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) return -ENOENT; } -#endif - static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; @@ -2902,7 +2898,9 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) } sctx->send_progress = sctx->cur_ino + 1; - ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0); + ret = del_waiting_dir_move(sctx, pm->ino); + ASSERT(ret == 0); + ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); if (ret < 0) goto out; From d0270aca88966641eb15306e9bd0c7ad15321440 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 7 Feb 2014 14:33:57 +0100 Subject: [PATCH 165/171] btrfs: commit transaction after setting label and features The set_fslabel ioctl uses btrfs_end_transaction, which means it's possible that the change will be lost if the system crashes, same for the newly set features. Let's use btrfs_commit_transaction instead. Signed-off-by: Jeff Mahoney Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 34772cbcc7aa..5bbf6b7216c3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4547,7 +4547,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); spin_unlock(&root->fs_info->super_lock); - ret = btrfs_end_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); out_unlock: mnt_drop_write_file(file); @@ -4711,7 +4711,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) btrfs_set_super_incompat_flags(super_block, newflags); spin_unlock(&root->fs_info->super_lock); - return btrfs_end_transaction(trans, root); + return btrfs_commit_transaction(trans, root); } long btrfs_ioctl(struct file *file, unsigned int From 8051aa1a3d5aaa7bd4c062cad94d09c3d567ef2e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 7 Feb 2014 14:34:04 +0100 Subject: [PATCH 166/171] btrfs: reserve no transaction units in btrfs_ioctl_set_features Added in patch "btrfs: add ioctls to query/change feature bits online" modifications to superblock don't need to reserve metadata blocks when starting a transaction. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5bbf6b7216c3..ebdd866d4cfd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4690,7 +4690,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) if (ret) return ret; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) return PTR_ERR(trans); From 27a377db745ed4d11b3b9b340756857cb8dde07f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 7 Feb 2014 13:57:59 -0500 Subject: [PATCH 167/171] Btrfs: don't loop forever if we can't run because of the tree mod log A user reported a 100% cpu hang with my new delayed ref code. Turns out I forgot to increase the count check when we can't run a delayed ref because of the tree mod log. If we can't run any delayed refs during this there is no point in continuing to look, and we need to break out. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9c9ecc93ae2c..32312e09f0f5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2385,6 +2385,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, spin_unlock(&delayed_refs->lock); locked_ref = NULL; cond_resched(); + count++; continue; } From a2aa75e18a21b21952dc6daa9bac7c9f4426f81f Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 8 Feb 2014 15:47:46 +0000 Subject: [PATCH 168/171] Btrfs: fix data corruption when reading/updating compressed extents When using a mix of compressed file extents and prealloc extents, it is possible to fill a page of a file with random, garbage data from some unrelated previous use of the page, instead of a sequence of zeroes. A simple sequence of steps to get into such case, taken from the test case I made for xfstests, is: _scratch_mkfs _scratch_mount "-o compress-force=lzo" $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar This results in the following file items in the fs tree: item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160 inode generation 6 transid 6 size 542872 block group 0 mode 100600 item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16 inode ref index 2 namelen 6 name: foobar item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53 extent data disk byte 0 nr 0 gen 6 extent data offset 0 nr 24576 ram 266240 extent compression 0 item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53 prealloc data disk byte 12849152 nr 241664 gen 6 prealloc data offset 0 nr 241664 item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53 extent data disk byte 12845056 nr 4096 gen 6 extent data offset 0 nr 20480 ram 20480 extent compression 2 item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53 prealloc data disk byte 13090816 nr 405504 gen 6 prealloc data offset 0 nr 258048 The on disk extent at offset 266240 (which corresponds to 1 single disk block), contains 5 compressed chunks of file data. Each of the first 4 compress 4096 bytes of file data, while the last one only compresses 3024 bytes of file data. Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 = 1072 bytes) should always return zeroes (our next extent is a prealloc one). The solution here is the compression code path to zero the remaining (untouched) bytes of the last page it uncompressed data into, as the information about how much space the file data consumes in the last page is not known in the upper layer fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing the remainder of the page but only if it corresponds to the last page of the inode and if the inode's size is not a multiple of the page size. This would cause not only returning random data on reads, but also permanently storing random data when updating parts of the region that should be zeroed. For the example above, it means updating a single byte in the region [285648 ; 286720[ would store that byte correctly but also store random data on disk. A test case for xfstests follows soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index af815eb8f970..ed1ff1cb1017 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1011,6 +1011,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + if (*pg_index == (vcnt - 1) && *pg_offset == 0) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); From d311d79de305f1ada47cadd672e6ed1b28a949eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 15:18:09 -0500 Subject: [PATCH 169/171] fix O_SYNC|O_APPEND syncing the wrong range on write() It actually goes back to 2004 ([PATCH] Concurrent O_SYNC write support) when sync_page_range() had been introduced; generic_file_write{,v}() correctly synced pos_after_write - written .. pos_after_write - 1 but generic_file_aio_write() synced pos_before_write .. pos_before_write + written - 1 instead. Which is not the same thing with O_APPEND, obviously. A couple of years later correct variant had been killed off when everything switched to use of generic_file_aio_write(). All users of generic_file_aio_write() are affected, and the same bug has been copied into other instances of ->aio_write(). The fix is trivial; the only subtle point is that generic_write_sync() ought to be inlined to avoid calculations useless for the majority of calls. Signed-off-by: Al Viro --- fs/cifs/file.c | 4 ++-- fs/ext4/file.c | 2 +- fs/ntfs/file.c | 2 +- fs/sync.c | 17 ----------------- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 8 +++++++- mm/filemap.c | 4 ++-- 7 files changed, 14 insertions(+), 25 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 853d6d1cc822..a7eda8ebfacc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2559,8 +2559,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, if (rc > 0) { ssize_t err; - err = generic_write_sync(file, pos, rc); - if (err < 0 && rc > 0) + err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) rc = err; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 43e64f6022eb..1a5073959f32 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -152,7 +152,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0 && ret > 0) ret = err; } diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ea4ba9daeb47..db9bd8a31725 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2134,7 +2134,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, pos, ret); + int err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/fs/sync.c b/fs/sync.c index f15537452231..e8ba024a055b 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -222,23 +222,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return do_fsync(fd, 1); } -/** - * generic_write_sync - perform syncing after a write if file / inode is sync - * @file: file to which the write happened - * @pos: offset where the write started - * @count: length of the write - * - * This is just a simple wrapper about our general syncing function. - */ -int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); -} -EXPORT_SYMBOL(generic_write_sync); - /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2e7989e3a2d6..64b48eade91d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -799,7 +799,7 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 09f553c59813..75ff961be051 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2273,7 +2273,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); +static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) +{ + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) + return 0; + return vfs_fsync_range(file, pos, pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); +} extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK diff --git a/mm/filemap.c b/mm/filemap.c index d56d3c145b9f..7a13f6ac5421 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); - if (err < 0 && ret > 0) + err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (err < 0) ret = err; } return ret; From c9efe51165fa0aff57be54e3cb0201ac87f68980 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 07:05:05 -0500 Subject: [PATCH 170/171] fix a kmap leak in virtio_console While we are at it, don't do kmap() under kmap_atomic(), *especially* for a page we'd allocated with GFP_KERNEL. It's spelled "page_address", and had that been more than that, we'd have a real trouble - kmap_high() can block, and doing that while holding kmap_atomic() is a Bad Idea(tm). Signed-off-by: Al Viro --- drivers/char/virtio_console.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index feea87cc6b8f..6928d094451d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -890,12 +890,10 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, } else { /* Failback to copying a page */ struct page *page = alloc_page(GFP_KERNEL); - char *src = buf->ops->map(pipe, buf, 1); - char *dst; + char *src; if (!page) return -ENOMEM; - dst = kmap(page); offset = sd->pos & ~PAGE_MASK; @@ -903,9 +901,8 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - memcpy(dst + offset, src + buf->offset, len); - - kunmap(page); + src = buf->ops->map(pipe, buf, 1); + memcpy(page_address(page) + offset, src + buf->offset, len); buf->ops->unmap(pipe, buf, src); sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); From b28a960c42fcd9cfc987441fa6d1c1a471f0f9ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Feb 2014 18:15:47 -0800 Subject: [PATCH 171/171] Linux 3.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 606ef7c4a544..933e1def6baf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Shuffling Zombie Juror # *DOCUMENTATION*