diff --git a/arch/Kconfig b/arch/Kconfig index 4877a8c8ee16..fe48fc7a3eba 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -32,8 +32,9 @@ config HAVE_OPROFILE config KPROBES bool "Kprobes" - depends on KALLSYMS && MODULES + depends on MODULES depends on HAVE_KPROBES + select KALLSYMS help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes @@ -45,7 +46,6 @@ config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT - select KALLSYMS_ALL config HAVE_EFFICIENT_UNALIGNED_ACCESS bool diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 528a11e8d3e3..824ca07860d0 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint { #include /* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_X 0x00 +#define X86_BREAKPOINT_LEN_X 0x40 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32f..ff15c9dcc25d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; + *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; + break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + /* Len */ switch (x86_len) { - case X86_BREAKPOINT_LEN_X: - *gen_len = sizeof(long); - break; case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9257510b4836..9d5f55848455 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc) } /* - * For a single GDT entry which changes, we do the lazy thing: alter our GDT, - * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. + * For a single GDT entry which changes, we simply change our copy and + * then tell the host about it. */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) @@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, } /* - * OK, I lied. There are three "thread local storage" GDT entries which change + * There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. + * __thread variables). As an optimization, we have a hypercall + * specifically for this case. + * + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall + * which took a range of entries? */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cfe4faabb0f6..009b819f48d0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -671,7 +671,9 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: *cpu_type = "i386/core_2"; break; case 0x1a: diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index eab58db5f91c..cd18493c9527 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -806,6 +806,8 @@ static const struct intel_driver_description { "G45/G43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, "B43", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_B43_1_HB, PCI_DEVICE_ID_INTEL_B43_1_IG, + "B43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, "G41", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG, diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index ee189c74d345..d09b1ab7e8ab 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -186,6 +186,8 @@ #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 #define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40 #define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42 +#define PCI_DEVICE_ID_INTEL_B43_1_HB 0x2E90 +#define PCI_DEVICE_ID_INTEL_B43_1_IG 0x2E92 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 #define PCI_DEVICE_ID_INTEL_EAGLELAKE_HB 0x2E00 diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 942a9826bd23..c810481a5bc2 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -596,6 +596,10 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, ssize_t ret; bool nonblock; + /* Userspace could be out to fool us */ + if (!count) + return 0; + port = filp->private_data; nonblock = filp->f_flags & O_NONBLOCK; @@ -642,7 +646,7 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) poll_wait(filp, &port->waitqueue, wait); ret = 0; - if (port->inbuf) + if (!will_read_block(port)) ret |= POLLIN | POLLRDNORM; if (!will_write_block(port)) ret |= POLLOUT; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 216deb579785..6dbe14cc4f74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -170,6 +170,7 @@ static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_VGA_DEVICE(0x2e22, &intel_g45_info), /* G45_G */ INTEL_VGA_DEVICE(0x2e32, &intel_g45_info), /* G41_G */ INTEL_VGA_DEVICE(0x2e42, &intel_g45_info), /* B43_G */ + INTEL_VGA_DEVICE(0x2e92, &intel_g45_info), /* B43_G.1 */ INTEL_VGA_DEVICE(0xa001, &intel_pineview_info), INTEL_VGA_DEVICE(0xa011, &intel_pineview_info), INTEL_VGA_DEVICE(0x0042, &intel_ironlake_d_info), diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 16fca1d1799a..cf4ffbee1c00 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2351,14 +2351,21 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) reg->obj = obj; - if (IS_GEN6(dev)) + switch (INTEL_INFO(dev)->gen) { + case 6: sandybridge_write_fence_reg(reg); - else if (IS_I965G(dev)) + break; + case 5: + case 4: i965_write_fence_reg(reg); - else if (IS_I9XX(dev)) + break; + case 3: i915_write_fence_reg(reg); - else + break; + case 2: i830_write_fence_reg(reg); + break; + } trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, obj_priv->tiling_mode); @@ -2381,22 +2388,26 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj_priv->fence_reg]; + uint32_t fence_reg; - if (IS_GEN6(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (obj_priv->fence_reg * 8), 0); - } else if (IS_I965G(dev)) { + break; + case 5: + case 4: I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); - } else { - uint32_t fence_reg; - - if (obj_priv->fence_reg < 8) - fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; + break; + case 3: + if (obj_priv->fence_reg > 8) + fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4; else - fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - - 8) * 4; + case 2: + fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; I915_WRITE(fence_reg, 0); + break; } reg->obj = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 72cae3cccad8..e85246ef691c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -79,6 +79,7 @@ mark_free(struct drm_i915_gem_object *obj_priv, struct list_head *unwind) { list_add(&obj_priv->evict_list, unwind); + drm_gem_object_reference(&obj_priv->base); return drm_mm_scan_add_block(obj_priv->gtt_space); } @@ -165,6 +166,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignmen list_for_each_entry(obj_priv, &unwind_list, evict_list) { ret = drm_mm_scan_remove_block(obj_priv->gtt_space); BUG_ON(ret); + drm_gem_object_unreference(&obj_priv->base); } /* We expect the caller to unpin, evict all and try again, or give up. @@ -181,18 +183,21 @@ found: * scanning, therefore store to be evicted objects on a * temporary list. */ list_move(&obj_priv->evict_list, &eviction_list); - } + } else + drm_gem_object_unreference(&obj_priv->base); } /* Unbinding will emit any required flushes */ list_for_each_entry_safe(obj_priv, tmp_obj_priv, &eviction_list, evict_list) { #if WATCH_LRU - DRM_INFO("%s: evicting %p\n", __func__, obj); + DRM_INFO("%s: evicting %p\n", __func__, &obj_priv->base); #endif ret = i915_gem_object_unbind(&obj_priv->base); if (ret) return ret; + + drm_gem_object_unreference(&obj_priv->base); } /* The just created free hole should be on the top of the free stack diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 2c6b98f2440e..31f08581e93a 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -789,16 +789,25 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); /* Fences */ - if (IS_I965G(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: + for (i = 0; i < 16; i++) + dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8)); + break; + case 5: + case 4: for (i = 0; i < 16; i++) dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); - } else { - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); - + break; + case 3: if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) for (i = 0; i < 8; i++) dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); + case 2: + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); + break; + } return 0; @@ -815,15 +824,24 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(HWS_PGA, dev_priv->saveHWS); /* Fences */ - if (IS_I965G(dev)) { + switch (INTEL_INFO(dev)->gen) { + case 6: + for (i = 0; i < 16; i++) + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), dev_priv->saveFENCE[i]); + break; + case 5: + case 4: for (i = 0; i < 16; i++) I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); - } else { - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + break; + case 3: + case 2: if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) for (i = 0; i < 8; i++) I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + break; } i915_restore_display(dev); diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index a02a8df73727..197d4f32585a 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -188,7 +188,7 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) if (wait_for((I915_READ(PCH_ADPA) & ADPA_CRT_HOTPLUG_FORCE_TRIGGER) == 0, 1000, 1)) - DRM_ERROR("timed out waiting for FORCE_TRIGGER"); + DRM_DEBUG_KMS("timed out waiting for FORCE_TRIGGER"); if (turn_off_dac) { I915_WRITE(PCH_ADPA, temp); @@ -245,7 +245,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector) if (wait_for((I915_READ(PORT_HOTPLUG_EN) & CRT_HOTPLUG_FORCE_DETECT) == 0, 1000, 1)) - DRM_ERROR("timed out waiting for FORCE_DETECT to go off"); + DRM_DEBUG_KMS("timed out waiting for FORCE_DETECT to go off"); } stat = I915_READ(PORT_HOTPLUG_STAT); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 19daead5b525..b5bf51a4502d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2463,11 +2463,19 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; + if (HAS_PCH_SPLIT(dev)) { /* FDI link clock is fixed at 2.7G */ if (mode->clock * 3 > IRONLAKE_FDI_FREQ * 4) return false; } + + /* XXX some encoders set the crtcinfo, others don't. + * Obviously we need some form of conflict resolution here... + */ + if (adjusted_mode->crtc_htotal == 0) + drm_mode_set_crtcinfo(adjusted_mode, 0); + return true; } diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..0fcd2640c23f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,6 +3,7 @@ config CEPH_FS depends on INET && EXPERIMENTAL select LIBCRC32C select CRYPTO_AES + select CRYPTO help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31fa..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (i_size < page_off + len) len = i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u\n", - inode, page, page->index, page_off, len); + dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", + inode, page, page->index, page_off, len, snapc); writeback_stat = atomic_long_inc_return(&client->writeback_count); if (writeback_stat > @@ -766,7 +766,8 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = page->index << PAGE_CACHE_SHIFT; + offset = (unsigned long long)page->index + << PAGE_CACHE_SHIFT; len = wsize; req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680ae..73c153092f72 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->i_rdcache_gen) + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * + * Unless @again is true, skip cap_snaps that were already sent to + * the MDS (i.e., during this session). + * * Called under i_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession) + struct ceph_mds_session **psession, + int again) __releases(ci->vfs_inode->i_lock) __acquires(ci->vfs_inode->i_lock) { @@ -1227,7 +1231,7 @@ retry: * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) - continue; + break; /* * if cap writeback already occurred, we should have dropped @@ -1240,6 +1244,13 @@ retry: dout("no auth cap (migrating?), doing nothing\n"); goto out; } + + /* only flush each capsnap once */ + if (!again && !list_empty(&capsnap->flushing_item)) { + dout("already flushed %p, skipping\n", capsnap); + continue; + } + mds = ci->i_auth_cap->session->s_mds; mseq = ci->i_auth_cap->mseq; @@ -1276,8 +1287,8 @@ retry: &session->s_cap_snaps_flushing); spin_unlock(&inode->i_lock); - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, capsnap->size); + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", + inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, @@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, NULL); + __ceph_flush_snaps(ci, NULL, 0); spin_unlock(&inode->i_lock); } @@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* flush snaps first time around only */ if (!list_empty(&ci->i_cap_snaps)) - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); goto retry_locked; retry: spin_lock(&inode->i_lock); @@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, if (cap && cap->session == session) { dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, cap, capsnap); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 1); } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec..a1986eb52045 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1021,11 +1021,15 @@ out_touch: static void ceph_dentry_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct inode *parent_inode = dentry->d_parent->d_inode; - u64 snapid = ceph_snap(parent_inode); + struct inode *parent_inode = NULL; + u64 snapid = CEPH_NOSNAP; + if (!IS_ROOT(dentry)) { + parent_inode = dentry->d_parent->d_inode; + if (parent_inode) + snapid = ceph_snap(parent_inode); + } dout("dentry_release %p parent %p\n", dentry, parent_inode); - if (parent_inode && snapid != CEPH_SNAPDIR) { struct ceph_inode_info *ci = ceph_inode(parent_inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03..62377ec37edf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash) + bool *prehash, bool set_offset) { struct dentry *realdn; @@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - ceph_set_dentry_offset(dn); + if (set_offset) + ceph_set_dentry_offset(dn); out: return dn; } @@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, d_delete(dn); goto done; } - dn = splice_dentry(dn, in, &have_lease); + dn = splice_dentry(dn, in, &have_lease, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, goto done; } dout(" linking snapped dir %p to dn %p\n", in, dn); - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1237,7 +1238,7 @@ retry_lookup: err = PTR_ERR(in); goto out; } - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, false); if (IS_ERR(dn)) dn = NULL; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b1351786..fad95f8f2608 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, num_fcntl_locks, num_flock_locks); unlock_kernel(); + } else { + err = ceph_pagelist_append(pagelist, &rec, reclen); } out_free: diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364..46a368b6dce5 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -5,10 +5,18 @@ #include "pagelist.h" +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) +{ + struct page *page = list_entry(pl->head.prev, struct page, + lru); + kunmap(page); +} + int ceph_pagelist_release(struct ceph_pagelist *pl) { if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); + while (!list_empty(&pl->head)) { struct page *page = list_first_entry(&pl->head, struct page, lru); @@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) pl->room += PAGE_SIZE; list_add_tail(&page->lru, &pl->head); if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); pl->mapped_tail = kmap(page); return 0; } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a..190b6c4a6f2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); + INIT_LIST_HEAD(&realm->dirty_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); @@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) INIT_LIST_HEAD(&capsnap->ci_item); INIT_LIST_HEAD(&capsnap->flushing_item); - capsnap->follows = snapc->seq - 1; + capsnap->follows = snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; @@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; int invalidate = 0; int err = -ENOMEM; + LIST_HEAD(dirty_realms); dout("update_snap_trace deletion=%d\n", deletion); more: @@ -626,24 +628,6 @@ more: } } - if (le64_to_cpu(ri->seq) > realm->seq) { - dout("update_snap_trace updating %llx %p %lld -> %lld\n", - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); - /* - * if the realm seq has changed, queue a cap_snap for every - * inode with open caps. we do this _before_ we update - * the realm info so that we prepare for writeback under the - * _previous_ snap context. - * - * ...unless it's a snap deletion! - */ - if (!deletion) - queue_realm_cap_snaps(realm); - } else { - dout("update_snap_trace %llx %p seq %lld unchanged\n", - realm->ino, realm, realm->seq); - } - /* ensure the parent is correct */ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) @@ -651,6 +635,8 @@ more: invalidate += err; if (le64_to_cpu(ri->seq) > realm->seq) { + dout("update_snap_trace updating %llx %p %lld -> %lld\n", + realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -668,9 +654,17 @@ more: if (err < 0) goto fail; + /* queue realm for cap_snap creation */ + list_add(&realm->dirty_item, &dirty_realms); + invalidate = 1; } else if (!realm->cached_context) { + dout("update_snap_trace %llx %p seq %lld new\n", + realm->ino, realm, realm->seq); invalidate = 1; + } else { + dout("update_snap_trace %llx %p seq %lld unchanged\n", + realm->ino, realm, realm->seq); } dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, @@ -683,6 +677,14 @@ more: if (invalidate) rebuild_snap_realms(realm); + /* + * queue cap snaps _after_ we've built the new snap contexts, + * so that i_head_snapc can be set appropriately. + */ + list_for_each_entry(realm, &dirty_realms, dirty_item) { + queue_realm_cap_snaps(realm); + } + __cleanup_empty_realms(mdsc); return 0; @@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) igrab(inode); spin_unlock(&mdsc->snap_flush_lock); spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); spin_unlock(&inode->i_lock); iput(inode); spin_lock(&mdsc->snap_flush_lock); @@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, }; struct inode *inode = ceph_find_inode(sb, vino); struct ceph_inode_info *ci; + struct ceph_snap_realm *oldrealm; if (!inode) continue; @@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, dout(" will move %p to split realm %llx %p\n", inode, realm->ino, realm); /* - * Remove the inode from the realm's inode - * list, but don't add it to the new realm - * yet. We don't want the cap_snap to be - * queued (again) by ceph_update_snap_trace() - * below. Queue it _now_, under the old context. + * Move the inode to the new realm */ spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + list_add(&ci->i_snap_realm_item, + &realm->inodes_with_caps); + oldrealm = ci->i_snap_realm; + ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); - ceph_queue_cap_snap(ci); + ceph_get_snap_realm(mdsc, realm); + ceph_put_snap_realm(mdsc, oldrealm); iput(inode); continue; @@ -880,43 +884,9 @@ skip_inode: ceph_update_snap_trace(mdsc, p, e, op == CEPH_SNAP_OP_DESTROY); - if (op == CEPH_SNAP_OP_SPLIT) { - /* - * ok, _now_ add the inodes into the new realm. - */ - for (i = 0; i < num_split_inos; i++) { - struct ceph_vino vino = { - .ino = le64_to_cpu(split_inos[i]), - .snap = CEPH_NOSNAP, - }; - struct inode *inode = ceph_find_inode(sb, vino); - struct ceph_inode_info *ci; - - if (!inode) - continue; - ci = ceph_inode(inode); - spin_lock(&inode->i_lock); - if (list_empty(&ci->i_snap_realm_item)) { - struct ceph_snap_realm *oldrealm = - ci->i_snap_realm; - - dout(" moving %p to split realm %llx %p\n", - inode, realm->ino, realm); - spin_lock(&realm->inodes_with_caps_lock); - list_add(&ci->i_snap_realm_item, - &realm->inodes_with_caps); - ci->i_snap_realm = realm; - spin_unlock(&realm->inodes_with_caps_lock); - ceph_get_snap_realm(mdsc, realm); - ceph_put_snap_realm(mdsc, oldrealm); - } - spin_unlock(&inode->i_lock); - iput(inode); - } - + if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ ceph_put_snap_realm(mdsc, realm); - } __cleanup_empty_realms(mdsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..b87638e84c4b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -690,6 +690,8 @@ struct ceph_snap_realm { struct list_head empty_item; /* if i have ref==0 */ + struct list_head dirty_item; /* if realm needs new context */ + /* the current set of snaps for this realm */ struct ceph_snap_context *cached_context; @@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession); + struct ceph_mds_session **psession, + int again); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..63d069bd80b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,6 +1093,10 @@ struct file_lock { #include +/* temporary stubs for BKL removal */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING diff --git a/kernel/sched.c b/kernel/sched.c index 794819eab9ca..c0d2067f3e0d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3513,9 +3513,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) rtime = nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { - u64 temp; + u64 temp = rtime; - temp = (u64)(rtime * utime); + temp *= utime; do_div(temp, total); utime = (cputime_t)temp; } else @@ -3546,9 +3546,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) rtime = nsecs_to_cputime(cputime.sum_exec_runtime); if (total) { - u64 temp; + u64 temp = rtime; - temp = (u64)(rtime * cputime.utime); + temp *= cputime.utime; do_div(temp, total); utime = (cputime_t)temp; } else diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a171138a9402..db3f674ca49d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3630,7 +3630,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; - if (!rq->nr_running) + if (rq->idle_at_tick) return 0; first_pick_cpu = atomic_read(&nohz.first_pick_cpu);