From 7f77897ef2b6a5ee4eb8bc24fe8b1f3eab254328 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 24 May 2011 11:43:18 +0200 Subject: [PATCH 001/327] HID: hiddev: fix potential use-after-free Commit 6cb4b040795 ("HID: hiddev: fix race between hiddev_disconnect and hiddev_release") made it possible to access hiddev (for unlocking the existance mutex) once hiddev has been kfreed. Change the order so that this can not happen (always unlock the mutex first, it is needed only to protect access to ->exist and ->open). Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index ff3c644888b1..4985f485932f 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -923,10 +923,11 @@ void hiddev_disconnect(struct hid_device *hid) usb_deregister_dev(usbhid->intf, &hiddev_class); if (hiddev->open) { + mutex_unlock(&hiddev->existancelock); usbhid_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); } else { + mutex_unlock(&hiddev->existancelock); kfree(hiddev); } - mutex_unlock(&hiddev->existancelock); } From e23be0a27dcc9297ff0495360d89bc5b0bf12383 Mon Sep 17 00:00:00 2001 From: Jimmy Hon Date: Fri, 20 May 2011 17:59:19 -0400 Subject: [PATCH 002/327] HID: add quirk for HyperPen 10000U Add 5543:0064 UC-Logic Technology Corp. Aiptek HyperPen 10000U to quirks with HID_QUIRK_MULTI_INPUT. Originally the device is reporting the x,y coordinates on Z and RX. By adding this quirk, there will be two kernel devices. The first one is muted and the second device will report coordinates on X and Y. Signed-off-by: Jimmy Hon Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0b374a6d6db0..2bbaad70d76d 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -622,6 +622,7 @@ #define USB_VENDOR_ID_UCLOGIC 0x5543 #define USB_DEVICE_ID_UCLOGIC_TABLET_PF1209 0x0042 #define USB_DEVICE_ID_UCLOGIC_TABLET_KNA5 0x6001 +#define USB_DEVICE_ID_UCLOGIC_TABLET_TWA60 0x0064 #define USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U 0x0003 #define USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U 0x0004 #define USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U 0x0005 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0e30b140edca..621959d5cc42 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -74,6 +74,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH, HID_QUIRK_MULTI_INPUT }, From 8c127f0717b438e6abc3d92d4ae248c4224b9dcb Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Wed, 25 May 2011 09:24:32 -0700 Subject: [PATCH 003/327] Input: properly assign return value of clamp() macro. [dtor@mail.ru: added mousedev changes] Signed-off-by: Hans Petter Selasky Cc: stable@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 2 +- drivers/input/mousedev.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 75e11c7b70fd..da38d97a51b1 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1756,7 +1756,7 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) { mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum - dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1, - clamp(mt_slots, 2, 32); + mt_slots = clamp(mt_slots, 2, 32); } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { mt_slots = 2; } else { diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 257e033986e4..0110b5a3a167 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -187,7 +187,7 @@ static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, if (size == 0) size = xres ? : 1; - clamp(value, min, max); + value = clamp(value, min, max); mousedev->packet.x = ((value - min) * xres) / size; mousedev->packet.abs_event = 1; @@ -201,7 +201,7 @@ static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, if (size == 0) size = yres ? : 1; - clamp(value, min, max); + value = clamp(value, min, max); mousedev->packet.y = yres - ((value - min) * yres) / size; mousedev->packet.abs_event = 1; From 5c699d7d3f94ee1dd934edea889b32f8279a4e65 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2011 11:49:16 +0300 Subject: [PATCH 004/327] HID: hiddev: fix use after free in hiddev_release There are a couple use after free bugs here. Signed-off-by: Dan Carpenter [jkosina@suse.cz: removed already fixed hunk] Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 4985f485932f..7c1188b53c3e 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -248,12 +248,15 @@ static int hiddev_release(struct inode * inode, struct file * file) usbhid_close(list->hiddev->hid); usbhid_put_power(list->hiddev->hid); } else { + mutex_unlock(&list->hiddev->existancelock); kfree(list->hiddev); + kfree(list); + return 0; } } - kfree(list); mutex_unlock(&list->hiddev->existancelock); + kfree(list); return 0; } From 0f7e4c33eb2c40b1e9cc24d2eab6de5921bc619c Mon Sep 17 00:00:00 2001 From: Kohei Kaigai Date: Thu, 26 May 2011 14:59:25 -0400 Subject: [PATCH 005/327] selinux: fix case of names with whitespace/multibytes on /selinux/create I submit the patch again, according to patch submission convension. This patch enables to accept percent-encoded object names as forth argument of /selinux/create interface to avoid possible bugs when we give an object name including whitespace or multibutes. E.g) if and when a userspace object manager tries to create a new object named as "resolve.conf but fake", it shall give this name as the forth argument of the /selinux/create. But sscanf() logic in kernel space fetches only the part earlier than the first whitespace. In this case, selinux may unexpectedly answer a default security context configured to "resolve.conf", but it is bug. Although I could not test this patch on named TYPE_TRANSITION rules actually, But debug printk() message seems to me the logic works correctly. I assume the libselinux provides an interface to apply this logic transparently, so nothing shall not be changed from the viewpoint of application. Signed-off-by: KaiGai Kohei Signed-off-by: Eric Paris --- security/selinux/selinuxfs.c | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index fde4e9d64bfd..19489042fdf7 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -29,6 +29,7 @@ #include #include #include +#include /* selinuxfs pseudo filesystem for exporting the security policy API. Based on the proc code and the fs/nfsd/nfsctl.c code. */ @@ -751,6 +752,14 @@ out: return length; } +static inline int hexcode_to_int(int code) { + if (code == '\0' || !isxdigit(code)) + return -1; + if (isdigit(code)) + return code - '0'; + return tolower(code) - 'a' + 10; +} + static ssize_t sel_write_create(struct file *file, char *buf, size_t size) { char *scon = NULL, *tcon = NULL; @@ -785,8 +794,34 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) nargs = sscanf(buf, "%s %s %hu %s", scon, tcon, &tclass, namebuf); if (nargs < 3 || nargs > 4) goto out; - if (nargs == 4) + if (nargs == 4) { + /* + * If and when the name of new object to be queried contains + * either whitespace or multibyte characters, they shall be + * encoded based on the percentage-encoding rule. + * If not encoded, the sscanf logic picks up only left-half + * of the supplied name; splitted by a whitespace unexpectedly. + */ + char *r, *w; + int c1, c2; + + r = w = namebuf; + do { + c1 = *r++; + if (c1 == '+') + c1 = ' '; + else if (c1 == '%') { + if ((c1 = hexcode_to_int(*r++)) < 0) + goto out; + if ((c2 = hexcode_to_int(*r++)) < 0) + goto out; + c1 = (c1 << 4) | c2; + } + *w++ = c1; + } while (c1 != '\0'); + objname = namebuf; + } length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); if (length) From ebf30dc91cc8592cd72b004219cfc276b3ad2854 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 31 May 2011 16:10:00 -0700 Subject: [PATCH 006/327] msm: timer: Fix SMP build error Fix build breakage on SMP=y builds due to 0f7b332 (ARM: consolidate SMP cross call implementation, 2011-04-03) arch/arm/mach-msm/timer.c: In function 'local_timer_setup': arch/arm/mach-msm/timer.c:295: error: implicit declaration of function 'gic_enable_ppi' Signed-off-by: Stephen Boyd Signed-off-by: David Brown --- arch/arm/mach-msm/timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 38b95e949d13..9bfdd5ad2441 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include From 5def1360252b974faeb438775c19c14338bc1903 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 5 Jun 2011 23:26:40 -0400 Subject: [PATCH 007/327] ext4: correct comments for ext4_free_blocks() metadata is not parameter of ext4_free_blocks() any more. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 859f2ae8864e..eb71dd59f2e4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4448,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * @inode: inode * @block: start physical block to free * @count: number of blocks to count - * @metadata: Are these metadata blocks + * @flags: flags used by ext4_free_blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, From 298c48a811673ba5e292359545f3af6d1a6c9764 Mon Sep 17 00:00:00 2001 From: Srinivas KANDAGATLA Date: Thu, 2 Jun 2011 10:30:44 +0000 Subject: [PATCH 008/327] sh: fix wrong icache/dcache address-array start addr in cache-debugfs. This patch fixes a icache/dcache address-array start address while dumping its entires in debugfs. Perviously the code was attempting to remember the address in static variable, which is no more required for debugfs, as the function can be executed in one pass. Without this patch the start address ends up in wrong place and the /sys/kernel/debug/sh/icache or dcache debugfs contents may not be correct. Signed-off-by: Srinivas Kandagatla Cc: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/mm/cache-debugfs.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 52411462c409..115725198038 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -26,9 +26,9 @@ static int cache_seq_show(struct seq_file *file, void *iter) { unsigned int cache_type = (unsigned int)file->private; struct cache_info *cache; - unsigned int waysize, way, cache_size; - unsigned long ccr, base; - static unsigned long addrstart = 0; + unsigned int waysize, way; + unsigned long ccr; + unsigned long addrstart = 0; /* * Go uncached immediately so we don't skew the results any @@ -45,28 +45,13 @@ static int cache_seq_show(struct seq_file *file, void *iter) } if (cache_type == CACHE_TYPE_DCACHE) { - base = CACHE_OC_ADDRESS_ARRAY; + addrstart = CACHE_OC_ADDRESS_ARRAY; cache = ¤t_cpu_data.dcache; } else { - base = CACHE_IC_ADDRESS_ARRAY; + addrstart = CACHE_IC_ADDRESS_ARRAY; cache = ¤t_cpu_data.icache; } - /* - * Due to the amount of data written out (depending on the cache size), - * we may be iterated over multiple times. In this case, keep track of - * the entry position in addrstart, and rewind it when we've hit the - * end of the cache. - * - * Likewise, the same code is used for multiple caches, so care must - * be taken for bouncing addrstart back and forth so the appropriate - * cache is hit. - */ - cache_size = cache->ways * cache->sets * cache->linesz; - if (((addrstart & 0xff000000) != base) || - (addrstart & 0x00ffffff) > cache_size) - addrstart = base; - waysize = cache->sets; /* From f17722f917b2f21497deb6edc62fb1683daa08e6 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 6 Jun 2011 00:05:17 -0400 Subject: [PATCH 009/327] ext4: Fix max file size and logical block counting of extent format file Kazuya Mio reported that he was able to hit BUG_ON(next == lblock) in ext4_ext_put_gap_in_cache() while creating a sparse file in extent format and fill the tail of file up to its end. We will hit the BUG_ON when we write the last block (2^32-1) into the sparse file. The root cause of the problem lies in the fact that we specifically set s_maxbytes so that block at s_maxbytes fit into on-disk extent format, which is 32 bit long. However, we are not storing start and end block number, but rather start block number and length in blocks. It means that in order to cover extent from 0 to EXT_MAX_BLOCK we need EXT_MAX_BLOCK+1 to fit into len (because we counting block 0 as well) - and it does not. The only way to fix it without changing the meaning of the struct ext4_extent members is, as Kazuya Mio suggested, to lower s_maxbytes by one fs block so we can cover the whole extent we can get by the on-disk extent format. Also in many places EXT_MAX_BLOCK is used as length instead of maximum logical block number as the name suggests, it is all a bit messy. So this commit renames it to EXT_MAX_BLOCKS and change its usage in some places to actually be maximum number of blocks in the extent. The bug which this commit fixes can be reproduced as follows: dd if=/dev/zero of=/mnt/mp1/file bs= count=1 seek=$((2**32-2)) sync dd if=/dev/zero of=/mnt/mp1/file bs= count=1 seek=$((2**32-1)) Reported-by: Kazuya Mio Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_extents.h | 7 +++++-- fs/ext4/extents.c | 34 +++++++++++++++++----------------- fs/ext4/move_extent.c | 10 +++++----- fs/ext4/super.c | 15 ++++++++++++--- 4 files changed, 39 insertions(+), 27 deletions(-) diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2e29abb30f76..476414644387 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, #define EXT_BREAK 1 #define EXT_REPEAT 2 -/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ -#define EXT_MAX_BLOCK 0xffffffff +/* + * Maximum number of logical blocks in a file; ext4_extent's ee_block is + * __le32. + */ +#define EXT_MAX_BLOCKS 0xffffffff /* * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5199bac7fc62..41575704600a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1408,7 +1408,7 @@ got_index: /* * ext4_ext_next_allocated_block: - * returns allocated block in subsequent extent or EXT_MAX_BLOCK. + * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. * NOTE: it considers block number from index entry as * allocated block. Thus, index entries have to be consistent * with leaves. @@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) depth = path->p_depth; if (depth == 0 && path->p_ext == NULL) - return EXT_MAX_BLOCK; + return EXT_MAX_BLOCKS; while (depth >= 0) { if (depth == path->p_depth) { @@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) depth--; } - return EXT_MAX_BLOCK; + return EXT_MAX_BLOCKS; } /* * ext4_ext_next_leaf_block: - * returns first allocated block from next leaf or EXT_MAX_BLOCK + * returns first allocated block from next leaf or EXT_MAX_BLOCKS */ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, struct ext4_ext_path *path) @@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, /* zero-tree has no leaf blocks at all */ if (depth == 0) - return EXT_MAX_BLOCK; + return EXT_MAX_BLOCKS; /* go to index block */ depth--; @@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, depth--; } - return EXT_MAX_BLOCK; + return EXT_MAX_BLOCKS; } /* @@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, */ if (b2 < b1) { b2 = ext4_ext_next_allocated_block(path); - if (b2 == EXT_MAX_BLOCK) + if (b2 == EXT_MAX_BLOCKS) goto out; } /* check for wrap through zero on extent logical start block*/ if (b1 + len1 < b1) { - len1 = EXT_MAX_BLOCK - b1; + len1 = EXT_MAX_BLOCKS - b1; newext->ee_len = cpu_to_le16(len1); ret = 1; } @@ -1767,7 +1767,7 @@ repeat: fex = EXT_LAST_EXTENT(eh); next = ext4_ext_next_leaf_block(inode, path); if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) - && next != EXT_MAX_BLOCK) { + && next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); npath = ext4_ext_find_extent(inode, next, NULL); @@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, BUG_ON(func == NULL); BUG_ON(inode == NULL); - while (block < last && block != EXT_MAX_BLOCK) { + while (block < last && block != EXT_MAX_BLOCKS) { num = last - block; /* find extent for this block */ down_read(&EXT4_I(inode)->i_data_sem); @@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, if (ex == NULL) { /* there is no extent yet, so gap is [0;-] */ lblock = 0; - len = EXT_MAX_BLOCK; + len = EXT_MAX_BLOCKS; ext_debug("cache gap(whole file):"); } else if (block < le32_to_cpu(ex->ee_block)) { lblock = block; @@ -2350,7 +2350,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, * never happen because at least one of the end points * needs to be on the edge of the extent. */ - if (end == EXT_MAX_BLOCK) { + if (end == EXT_MAX_BLOCKS - 1) { ext_debug(" bad truncate %u:%u\n", start, end); block = 0; @@ -2398,7 +2398,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, * If this is a truncate, this condition * should never happen */ - if (end == EXT_MAX_BLOCK) { + if (end == EXT_MAX_BLOCKS - 1) { ext_debug(" bad truncate %u:%u\n", start, end); err = -EIO; @@ -2478,7 +2478,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, * we need to remove it from the leaf */ if (num == 0) { - if (end != EXT_MAX_BLOCK) { + if (end != EXT_MAX_BLOCKS - 1) { /* * For hole punching, we need to scoot all the * extents up when an extent is removed so that @@ -3699,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); /* In a multi-transaction truncate, we only make the final * transaction synchronous. @@ -4347,8 +4347,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, start_blk = start >> inode->i_sb->s_blocksize_bits; last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; - if (last_blk >= EXT_MAX_BLOCK) - last_blk = EXT_MAX_BLOCK-1; + if (last_blk >= EXT_MAX_BLOCKS) + last_blk = EXT_MAX_BLOCKS-1; len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2b8304bf3c50..f57455a1b1b2 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - if ((orig_start > EXT_MAX_BLOCK) || - (donor_start > EXT_MAX_BLOCK) || - (*len > EXT_MAX_BLOCK) || - (orig_start + *len > EXT_MAX_BLOCK)) { + if ((orig_start >= EXT_MAX_BLOCKS) || + (donor_start >= EXT_MAX_BLOCKS) || + (*len > EXT_MAX_BLOCKS) || + (orig_start + *len >= EXT_MAX_BLOCKS)) { ext4_debug("ext4 move extent: Can't handle over [%u] blocks " - "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, + "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cc5c157aa11d..9ea71aa864b3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, * so that won't be a limiting factor. * + * However there is other limiting factor. We do store extents in the form + * of starting block and length, hence the resulting length of the extent + * covering maximum file size must fit into on-disk format containers as + * well. Given that length is always by 1 unit bigger than max unit (because + * we count 0 as well) we have to lower the s_maxbytes by one fs block. + * * Note, this does *not* consider any metadata overhead for vfs i_blocks. */ static loff_t ext4_max_size(int blkbits, int has_huge_files) @@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) upper_limit <<= blkbits; } - /* 32-bit extent-start container, ee_block */ - res = 1LL << 32; + /* + * 32-bit extent-start container, ee_block. We lower the maxbytes + * by one fs block, so ee_len can cover the extent of maximum file + * size + */ + res = (1LL << 32) - 1; res <<= blkbits; - res -= 1; /* Sanity check against vm- & vfs- imposed limits */ if (res > upper_limit) From c03f8aa9abdd517477c2021ea1251939b4da49e6 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 6 Jun 2011 00:06:52 -0400 Subject: [PATCH 010/327] ext4: use FIEMAP_EXTENT_LAST flag for last extent in fiemap Currently we are not marking the extent as the last one (FIEMAP_EXTENT_LAST) if there is a hole at the end of the file. This is because we just do not check for it right now and continue searching for next extent. But at the point we hit the hole at the end of the file, it is too late. This commit adds check for the allocated block in subsequent extent and if there is no more extents (block = EXT_MAX_BLOCKS) just flag the current one as the last one. This behaviour has been spotted unintentionally by 252 xfstest, when the test hangs out, because of wrong loop condition. However on other filesystems (like xfs) it will exit anyway, because we notice the last extent flag and exit. With this patch xfstest 252 does not hang anymore, ext4 fiemap implementation still reports bad extent type in some cases, however this seems to be different issue. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_extents.h | 2 +- fs/ext4/extents.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 476414644387..095c36f3b612 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -125,7 +125,7 @@ struct ext4_ext_path { * positive retcode - signal for ext4_ext_walk_space(), see below * callback must return valid extent (passed or newly created) */ -typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, +typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t, struct ext4_ext_cache *, struct ext4_extent *, void *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 41575704600a..f815cc81e7a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1958,7 +1958,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, err = -EIO; break; } - err = func(inode, path, &cbex, ex, cbdata); + err = func(inode, next, &cbex, ex, cbdata); ext4_ext_drop_refs(path); if (err < 0) @@ -3914,14 +3914,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, /* * Callback function called for each extent to gather FIEMAP information. */ -static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, +static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, struct ext4_ext_cache *newex, struct ext4_extent *ex, void *data) { __u64 logical; __u64 physical; __u64 length; - loff_t size; __u32 flags = 0; int ret = 0; struct fiemap_extent_info *fieinfo = data; @@ -4103,8 +4102,7 @@ found_delayed_extent: if (ex && ext4_ext_is_uninitialized(ex)) flags |= FIEMAP_EXTENT_UNWRITTEN; - size = i_size_read(inode); - if (logical + length >= size) + if (next == EXT_MAX_BLOCKS) flags |= FIEMAP_EXTENT_LAST; ret = fiemap_fill_next_extent(fieinfo, logical, physical, From ad377c630864e2609c54385907493dbf68a34295 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 25 May 2011 11:34:57 +0200 Subject: [PATCH 011/327] arm: mxs: include asm/processor.h for cpu_relax() I get this build error as of today: arch/arm/mach-mxs/ocotp.c: In function 'mxs_get_ocotp': arch/arm/mach-mxs/ocotp.c:54: error: implicit declaration of function 'cpu_relax' make[2]: *** [arch/arm/mach-mxs/ocotp.o] Error 1 Looks like it has been indirectly included before which broke now. Include it directly. Signed-off-by: Wolfram Sang Cc: Shawn Guo Signed-off-by: Sascha Hauer --- arch/arm/mach-mxs/ocotp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-mxs/ocotp.c b/arch/arm/mach-mxs/ocotp.c index 65157a35dbba..54add60f94c9 100644 --- a/arch/arm/mach-mxs/ocotp.c +++ b/arch/arm/mach-mxs/ocotp.c @@ -16,6 +16,8 @@ #include #include +#include /* for cpu_relax() */ + #include #define OCOTP_WORD_OFFSET 0x20 From 5f85e931278ff80aa3ea7861a73a6c6ce940f2e9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 May 2011 16:05:08 -0300 Subject: [PATCH 012/327] ARM: mx51/sdma: Check the chip revision in run-time Check the MX51 chip revision in run-time so that the correct SDMA firmware can be loaded. While at it also remove the silicon revision from the sdma_script_start_addrs structure name for MX51. All the MX51 revisions share the same SDMA start addresses. Signed-off-by: Fabio Estevam Signed-off-by: Sascha Hauer --- arch/arm/plat-mxc/devices/platform-imx-dma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-mxc/devices/platform-imx-dma.c b/arch/arm/plat-mxc/devices/platform-imx-dma.c index 3538b85ede91..b130f60ca6b7 100644 --- a/arch/arm/plat-mxc/devices/platform-imx-dma.c +++ b/arch/arm/plat-mxc/devices/platform-imx-dma.c @@ -139,7 +139,7 @@ static struct sdma_script_start_addrs addr_imx35_to2 = { #endif #ifdef CONFIG_SOC_IMX51 -static struct sdma_script_start_addrs addr_imx51_to1 = { +static struct sdma_script_start_addrs addr_imx51 = { .ap_2_ap_addr = 642, .uart_2_mcu_addr = 817, .mcu_2_app_addr = 747, @@ -196,7 +196,9 @@ static int __init imxXX_add_imx_dma(void) #if defined(CONFIG_SOC_IMX51) if (cpu_is_mx51()) { - imx51_imx_sdma_data.pdata.script_addrs = &addr_imx51_to1; + int to_version = mx51_revision() >> 4; + imx51_imx_sdma_data.pdata.to_version = to_version; + imx51_imx_sdma_data.pdata.script_addrs = &addr_imx51; ret = imx_add_imx_sdma(&imx51_imx_sdma_data); } else #endif From 363e9f05cbd105a900b7baf2cc55ec0cba546d08 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 6 Jun 2011 17:57:58 +0900 Subject: [PATCH 013/327] sh: Remove compressed kernel libgcc dependency. SH-2A is unable to combine the kernel and libgcc objects due to fundamental disagreements over FDPIC settings. As the kernel already contains all of the libgcc bits broken out, there's not much need to bother with the linking anymore, as everything can already be derived from the lib dir. This simply plugs in the necessary bits to ensure that everything is built uniformly, enabling us to wean the compressed build off of explicit libgcc linking. Reported-by: Phil Edworthy Signed-off-by: Paul Mundt --- arch/sh/boot/compressed/Makefile | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile index 780e083e4d17..23bc849d9c64 100644 --- a/arch/sh/boot/compressed/Makefile +++ b/arch/sh/boot/compressed/Makefile @@ -27,8 +27,6 @@ IMAGE_OFFSET := $(shell /bin/bash -c 'printf "0x%08x" \ $(CONFIG_BOOT_LINK_OFFSET)]') endif -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - ifeq ($(CONFIG_MCOUNT),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) @@ -37,7 +35,25 @@ endif LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \ -T $(obj)/../../kernel/vmlinux.lds -$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE +# +# Pull in the necessary libgcc bits from the in-kernel implementation. +# +lib1funcs-$(CONFIG_SUPERH32) := ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S \ + lshrsi3.S +lib1funcs-obj := \ + $(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y)))) + +lib1funcs-dir := $(srctree)/arch/$(SRCARCH)/lib +ifeq ($(BITS),64) + lib1funcs-dir := $(addsuffix $(BITS), $(lib1funcs-dir)) +endif + +KBUILD_CFLAGS += -I$(lib1funcs-dir) + +$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE + $(call cmd,shipped) + +$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE $(call if_changed,ld) @: From 2fdf99934c2c39848ad3633ce504a0262f21faf9 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 May 2011 19:44:22 +0100 Subject: [PATCH 014/327] ARM: 6946/1: vexpress: move v2m clock init to init_early Commit 7ff550de99141cbd3be0129d563cc4554fdde9f6 breaks vexpress booting. The v2m clock table needs to be setup in init_early before the timer initialization occurs. Signed-off-by: Rob Herring Signed-off-by: Russell King --- arch/arm/mach-vexpress/v2m.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 285edcd2da2a..9e6b93b1a043 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -46,12 +46,6 @@ static struct map_desc v2m_io_desc[] __initdata = { }, }; -static void __init v2m_init_early(void) -{ - ct_desc->init_early(); - versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000); -} - static void __init v2m_timer_init(void) { u32 scctrl; @@ -365,6 +359,13 @@ static struct clk_lookup v2m_lookups[] = { }, }; +static void __init v2m_init_early(void) +{ + ct_desc->init_early(); + clkdev_add_table(v2m_lookups, ARRAY_SIZE(v2m_lookups)); + versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000); +} + static void v2m_power_off(void) { if (v2m_cfg_write(SYS_CFG_SHUTDOWN | SYS_CFG_SITE_MB, 0)) @@ -418,8 +419,6 @@ static void __init v2m_init(void) { int i; - clkdev_add_table(v2m_lookups, ARRAY_SIZE(v2m_lookups)); - platform_device_register(&v2m_pcie_i2c_device); platform_device_register(&v2m_ddc_i2c_device); platform_device_register(&v2m_flash_device); From 9425032b77c3a4f066a032bac7f7d08668b6cc3c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 31 May 2011 22:10:03 +0100 Subject: [PATCH 015/327] ARM: 6947/2: mach-u300: fix compilation error in timer The introduction of the mmio timer accidentally referenced the old clocksource struct which does not exist anymore. Fix this by using a simple string instead. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-u300/timer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 891cf44591e0..18d7fa0603c2 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -411,8 +411,7 @@ static void __init u300_timer_init(void) /* Use general purpose timer 2 as clock source */ if (clocksource_mmio_init(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC, "GPT2", rate, 300, 32, clocksource_mmio_readl_up)) - printk(KERN_ERR "timer: failed to initialize clock " - "source %s\n", clocksource_u300_1mhz.name); + pr_err("timer: failed to initialize U300 clock source\n"); clockevents_calc_mult_shift(&clockevent_u300_1mhz, rate, APPTIMER_MIN_RANGE); From 9a819d8ac8197b44bbc2adad592b677fe749804d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 31 May 2011 08:09:39 +0100 Subject: [PATCH 016/327] ARM: 6948/1: Fix .size directives for __arm{7,9}tdmi_proc_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gas used to accept (and ignore?) .size directives which referred to undefined symbols, as these do. In binutils 2.21 these are treated as fatal errors. The issue in proc-arm7tdmi.S was also fixed independently by Peter Chubb. Signed-off-by: Ben Hutchings Signed-off-by: Uwe Kleine-König Signed-off-by: Russell King --- arch/arm/mm/proc-arm7tdmi.S | 2 +- arch/arm/mm/proc-arm9tdmi.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index e4c165ca6696..537ffcb0646d 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -146,7 +146,7 @@ __arm7tdmi_proc_info: .long 0 .long 0 .long v4_cache_fns - .size __arm7tdmi_proc_info, . - __arm7dmi_proc_info + .size __arm7tdmi_proc_info, . - __arm7tdmi_proc_info .type __triscenda7_proc_info, #object __triscenda7_proc_info: diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 7b7ebd4d096d..546b54da1005 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -116,7 +116,7 @@ __arm9tdmi_proc_info: .long 0 .long 0 .long v4_cache_fns - .size __arm9tdmi_proc_info, . - __arm9dmi_proc_info + .size __arm9tdmi_proc_info, . - __arm9tdmi_proc_info .type __p2001_proc_info, #object __p2001_proc_info: From 45f6d7e0e634d49744c1a590461ed1bb3d2201ac Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 2 Jun 2011 15:01:36 +0100 Subject: [PATCH 017/327] ARM: 6951/1: include .bss in memory layout information The "Virtual memory kernel layout" message at startup already prints .text and .data. Print .bss too. Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mm/init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2c2cce9cd8c8..b2cf9460ea60 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -635,7 +635,8 @@ void __init mem_init(void) " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" " .init : 0x%p" " - 0x%p" " (%4d kB)\n" " .text : 0x%p" " - 0x%p" " (%4d kB)\n" - " .data : 0x%p" " - 0x%p" " (%4d kB)\n", + " .data : 0x%p" " - 0x%p" " (%4d kB)\n" + " .bss : 0x%p" " - 0x%p" " (%4d kB)\n", MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + (PAGE_SIZE)), @@ -657,7 +658,8 @@ void __init mem_init(void) MLK_ROUNDUP(__init_begin, __init_end), MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(_sdata, _edata)); + MLK_ROUNDUP(_sdata, _edata), + MLK_ROUNDUP(__bss_start, __bss_stop)); #undef MLK #undef MLM From 9fc2552a68eb28f95f367156cf46a3da7843ff37 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 5 Jun 2011 02:24:58 +0100 Subject: [PATCH 018/327] ARM: 6952/1: fix lockdep warning of "unannotated irqs-off" This patch fixes the lockdep warning of "unannotated irqs-off"[1]. After entering __irq_usr, arm core will disable interrupt automatically, but __irq_usr does not annotate the irq disable, so lockdep may complain the warning if it has chance to check this in irq handler. This patch adds trace_hardirqs_off in __irq_usr before entering irq_handler to handle the irq, also calls ret_to_user_from_irq to avoid calling disable_irq again. This is also a fix for irq off tracer. [1], lockdep warning log of "unannotated irqs-off" [ 13.804687] ------------[ cut here ]------------ [ 13.809570] WARNING: at kernel/lockdep.c:3335 check_flags+0x78/0x1d0() [ 13.816467] Modules linked in: [ 13.819732] Backtrace: [ 13.822357] [] (dump_backtrace+0x0/0x100) from [] (dump_stack+0x20/0x24) [ 13.831268] r6:c07d8c2c r5:00000d07 r4:00000000 r3:00000000 [ 13.837280] [] (dump_stack+0x0/0x24) from [] (warn_slowpath_common+0x5c/0x74) [ 13.846649] [] (warn_slowpath_common+0x0/0x74) from [] (warn_slowpath_null+0x2c/0x34) [ 13.856781] r8:00000000 r7:00000000 r6:c18b8194 r5:60000093 r4:ef182000 [ 13.863708] r3:00000009 [ 13.866485] [] (warn_slowpath_null+0x0/0x34) from [] (check_flags+0x78/0x1d0) [ 13.875823] [] (check_flags+0x0/0x1d0) from [] (lock_acquire+0x4c/0x150) [ 13.884704] [] (lock_acquire+0x0/0x150) from [] (_raw_spin_lock+0x4c/0x84) [ 13.893798] [] (_raw_spin_lock+0x0/0x84) from [] (sched_ttwu_pending+0x58/0x8c) [ 13.903320] r6:ef92d040 r5:00000003 r4:c18b8180 [ 13.908233] [] (sched_ttwu_pending+0x0/0x8c) from [] (scheduler_ipi+0x18/0x1c) [ 13.917663] r6:ef183fb0 r5:00000003 r4:00000000 r3:00000001 [ 13.923645] [] (scheduler_ipi+0x0/0x1c) from [] (do_IPI+0x9c/0xfc) [ 13.932006] [] (do_IPI+0x0/0xfc) from [] (__irq_usr+0x48/0xe0) [ 13.939971] Exception stack(0xef183fb0 to 0xef183ff8) [ 13.945281] 3fa0: ffffffc3 0001500c 00000001 0001500c [ 13.953948] 3fc0: 00000050 400b45f0 400d9000 00000000 00000001 400d9600 6474e552 bea05b3c [ 13.962585] 3fe0: 400d96c0 bea059c0 400b6574 400b65d8 20000010 ffffffff [ 13.969573] r6:00000403 r5:fa240100 r4:ffffffff r3:20000010 [ 13.975585] ---[ end trace efc4896ab0fb62cb ]--- [ 13.980468] possible reason: unannotated irqs-off. [ 13.985534] irq event stamp: 1610 [ 13.989044] hardirqs last enabled at (1610): [] no_work_pending+0x8/0x2c [ 13.997131] hardirqs last disabled at (1609): [] ret_slow_syscall+0xc/0x1c [ 14.005371] softirqs last enabled at (0): [] copy_process+0x2cc/0xa24 [ 14.013183] softirqs last disabled at (0): [< (null)>] (null) Signed-off-by: Ming Lei Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 6 +++++- arch/arm/kernel/entry-common.S | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index e8d885676807..90c62cd51ca9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -435,6 +435,10 @@ __irq_usr: usr_entry kuser_cmpxchg_check +#ifdef CONFIG_IRQSOFF_TRACER + bl trace_hardirqs_off +#endif + get_thread_info tsk #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -453,7 +457,7 @@ __irq_usr: #endif mov why, #0 - b ret_to_user + b ret_to_user_from_irq UNWIND(.fnend ) ENDPROC(__irq_usr) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 1e7b04a40a31..b2a27b6b0046 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -64,6 +64,7 @@ work_resched: ENTRY(ret_to_user) ret_slow_syscall: disable_irq @ disable interrupts +ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne work_pending @@ -75,6 +76,7 @@ no_work_pending: arch_ret_to_user r1, lr restore_user_regs fast = 0, offset = 0 +ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) /* From 17ee083b7897ab27b4949c42de805889ebd2b4c5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 5 May 2011 17:23:10 +0100 Subject: [PATCH 019/327] ARM: 6894/1: mmci: trigger card detect IRQs on falling and rising edges Right now the card detect IRQ for MMCI is requested without any flags which will give some default machine-specified IRQ behaviour. However on the U300 rising+falling edges (such as can be expected from a simple GPIO to generate when inserting/removing a card) need to be requested explicitly. Cc: Rabin Vincent Cc: Ulf Hansson Cc: Sebastian Rasmussen Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/mmc/host/mmci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 5da5bea0f9f0..7721de942c69 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1144,9 +1144,17 @@ static int __devinit mmci_probe(struct amba_device *dev, else if (ret != -ENOSYS) goto err_gpio_cd; + /* + * A gpio pin that will detect cards when inserted and removed + * will most likely want to trigger on the edges if it is + * 0 when ejected and 1 when inserted (or mutatis mutandis + * for the inverted case) so we request triggers on both + * edges. + */ ret = request_any_context_irq(gpio_to_irq(plat->gpio_cd), - mmci_cd_irq, 0, - DRIVER_NAME " (cd)", host); + mmci_cd_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + DRIVER_NAME " (cd)", host); if (ret >= 0) host->gpio_cd_irq = gpio_to_irq(plat->gpio_cd); } From a9c667f8f0656631ee5438baaf21bf30d5f67375 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 6 Jun 2011 09:51:52 -0400 Subject: [PATCH 020/327] ext4: fixed tracepoints cleanup While creating fixed tracepoints for ext3, basically by porting them from ext4, I found a lot of useless retyping, wrong type usage, useless variable passing and other inconsistencies in the ext4 fixed tracepoint code. This patch cleans the fixed tracepoint code for ext4 and also simplify some of them. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- fs/ext4/mballoc.c | 6 +- include/trace/events/ext4.h | 179 +++++++++++++++--------------------- 3 files changed, 80 insertions(+), 107 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a5763e3505ba..e3126c051006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; - trace_ext4_writepage(inode, page); + trace_ext4_writepage(page); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index eb71dd59f2e4..6ed859d56850 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, free += next - bit; trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); - trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, - grp_blk_start + bit, next - bit); + trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, + next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ext4_group_t group; ext4_grpblk_t bit; - trace_ext4_mb_release_group_pa(sb, pa); + trace_ext4_mb_release_group_pa(pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index e09592d2f916..5ce2b2f5f524 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -26,7 +26,7 @@ TRACE_EVENT(ext4_free_inode, __field( umode_t, mode ) __field( uid_t, uid ) __field( gid_t, gid ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -40,9 +40,8 @@ TRACE_EVENT(ext4_free_inode, TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->uid, __entry->gid, - (unsigned long long) __entry->blocks) + (unsigned long) __entry->ino, __entry->mode, + __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, @@ -178,7 +177,7 @@ TRACE_EVENT(ext4_begin_ordered_truncate, TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (long long) __entry->new_size) + __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, @@ -204,7 +203,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin, __entry->flags = flags; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) @@ -248,7 +247,7 @@ DECLARE_EVENT_CLASS(ext4__write_end, __entry->copied = copied; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) @@ -286,29 +285,6 @@ DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_writepage, - TP_PROTO(struct inode *inode, struct page *page), - - TP_ARGS(inode, page), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( pgoff_t, index ) - - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->index = page->index; - ), - - TP_printk("dev %d,%d ino %lu page_index %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->index) -); - TRACE_EVENT(ext4_da_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), @@ -341,7 +317,7 @@ TRACE_EVENT(ext4_da_writepages, ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " - "range_start %llu range_end %llu sync_mode %d" + "range_start %lld range_end %lld sync_mode %d" "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, @@ -449,7 +425,14 @@ DECLARE_EVENT_CLASS(ext4__page_op, TP_printk("dev %d,%d ino %lu page_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index) + (unsigned long) __entry->index) +); + +DEFINE_EVENT(ext4__page_op, ext4_writepage, + + TP_PROTO(struct page *page), + + TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_readpage, @@ -489,7 +472,7 @@ TRACE_EVENT(ext4_invalidatepage, TP_printk("dev %d,%d ino %lu page_index %lu offset %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index, __entry->offset) + (unsigned long) __entry->index, __entry->offset) ); TRACE_EVENT(ext4_discard_blocks, @@ -562,12 +545,10 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, ); TRACE_EVENT(ext4_mb_release_inode_pa, - TP_PROTO(struct super_block *sb, - struct inode *inode, - struct ext4_prealloc_space *pa, + TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), - TP_ARGS(sb, inode, pa, block, count), + TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) @@ -578,8 +559,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->ino = inode->i_ino; + __entry->dev = pa->pa_inode->i_sb->s_dev; + __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), @@ -591,10 +572,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ); TRACE_EVENT(ext4_mb_release_group_pa, - TP_PROTO(struct super_block *sb, - struct ext4_prealloc_space *pa), + TP_PROTO(struct ext4_prealloc_space *pa), - TP_ARGS(sb, pa), + TP_ARGS(pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -604,7 +584,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), @@ -666,10 +646,10 @@ TRACE_EVENT(ext4_request_blocks, __field( ino_t, ino ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -687,17 +667,13 @@ TRACE_EVENT(ext4_request_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu " - "lleft %llu lright %llu pleft %llu pright %llu ", + TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu " + "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->logical, __entry->goal, + __entry->lleft, __entry->lright, __entry->pleft, + __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, @@ -711,10 +687,10 @@ TRACE_EVENT(ext4_allocate_blocks, __field( __u64, block ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -733,17 +709,13 @@ TRACE_EVENT(ext4_allocate_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu " - "goal %llu lleft %llu lright %llu pleft %llu pright %llu", + TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u " + "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, __entry->block, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->block, __entry->logical, + __entry->goal, __entry->lleft, __entry->lright, + __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, @@ -755,10 +727,10 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( umode_t, mode ) __field( __u64, block ) __field( unsigned long, count ) - __field( int, flags ) + __field( int, flags ) ), TP_fast_assign( @@ -798,7 +770,7 @@ TRACE_EVENT(ext4_sync_file_enter, __entry->parent = dentry->d_parent->d_inode->i_ino; ), - TP_printk("dev %d,%d ino %ld parent %ld datasync %d ", + TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) @@ -821,7 +793,7 @@ TRACE_EVENT(ext4_sync_file_exit, __entry->dev = inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1005,7 +977,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc, __entry->result_len = len; ), - TP_printk("dev %d,%d inode %lu extent %u/%d/%u ", + TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, @@ -1093,7 +1065,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1127,7 +1099,7 @@ TRACE_EVENT(ext4_da_reserve_space, "reserved_data_blocks %d reserved_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->md_needed, __entry->reserved_data_blocks, __entry->reserved_meta_blocks) ); @@ -1164,7 +1136,7 @@ TRACE_EVENT(ext4_da_release_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1239,14 +1211,15 @@ TRACE_EVENT(ext4_direct_IO_enter, __entry->rw = rw; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, __entry->rw) + __entry->pos, __entry->len, __entry->rw) ); TRACE_EVENT(ext4_direct_IO_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret), + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, + int rw, int ret), TP_ARGS(inode, offset, len, rw, ret), @@ -1268,10 +1241,10 @@ TRACE_EVENT(ext4_direct_IO_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, + __entry->pos, __entry->len, __entry->rw, __entry->ret) ); @@ -1296,15 +1269,15 @@ TRACE_EVENT(ext4_fallocate_enter, __entry->mode = mode; ), - TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, - (unsigned long long) __entry->len, __entry->mode) + (unsigned long) __entry->ino, __entry->pos, + __entry->len, __entry->mode) ); TRACE_EVENT(ext4_fallocate_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), + TP_PROTO(struct inode *inode, loff_t offset, + unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), @@ -1312,7 +1285,7 @@ TRACE_EVENT(ext4_fallocate_exit, __field( ino_t, ino ) __field( dev_t, dev ) __field( loff_t, pos ) - __field( unsigned, blocks ) + __field( unsigned int, blocks ) __field( int, ret ) ), @@ -1324,10 +1297,10 @@ TRACE_EVENT(ext4_fallocate_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->blocks, + __entry->pos, __entry->blocks, __entry->ret) ); @@ -1350,7 +1323,7 @@ TRACE_EVENT(ext4_unlink_enter, __entry->dev = dentry->d_inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld size %lld parent %ld", + TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) @@ -1373,7 +1346,7 @@ TRACE_EVENT(ext4_unlink_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1387,7 +1360,7 @@ DECLARE_EVENT_CLASS(ext4__truncate, TP_STRUCT__entry( __field( ino_t, ino ) __field( dev_t, dev ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -1396,9 +1369,9 @@ DECLARE_EVENT_CLASS(ext4__truncate, __entry->blocks = inode->i_blocks; ), - TP_printk("dev %d,%d ino %lu blocks %lu", + TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, (unsigned long) __entry->blocks) + (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, @@ -1417,7 +1390,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - unsigned len, unsigned flags), + unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), @@ -1425,8 +1398,8 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, __field( ino_t, ino ) __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) - __field( unsigned, len ) - __field( unsigned, flags ) + __field( unsigned int, len ) + __field( unsigned int, flags ) ), TP_fast_assign( @@ -1440,7 +1413,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, __entry->len, __entry->flags) + __entry->lblk, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, @@ -1459,7 +1432,7 @@ DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, unsigned len, int ret), + ext4_fsblk_t pblk, unsigned int len, int ret), TP_ARGS(inode, lblk, pblk, len, ret), @@ -1468,7 +1441,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) - __field( unsigned, len ) + __field( unsigned int, len ) __field( int, ret ) ), @@ -1484,7 +1457,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, + __entry->lblk, __entry->pblk, __entry->len, __entry->ret) ); @@ -1524,7 +1497,7 @@ TRACE_EVENT(ext4_ext_load_extent, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk) + __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 May 2011 12:24:58 -0400 Subject: [PATCH 021/327] nfsd: fix dependency of nfsd on auth_rpcgss Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5 enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss module. It's a little ad hoc, but since the only piece of information nfsd needs from rpcsec_gss_krb5 is a single static string, one solution is just to share it with an include file. Cc: stable@kernel.org Reported-by: Michael Guntsche Cc: Kevin Coffman Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 19 ++++++------------- include/linux/sunrpc/gss_krb5_enctypes.h | 4 ++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) create mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f5eae40f34e..2b1449dd2f49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -189,18 +190,10 @@ static struct file_operations export_features_operations = { .release = single_release, }; -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) { - struct gss_api_mech *k5mech; - - k5mech = gss_mech_get_by_name("krb5"); - if (k5mech == NULL) - goto out; - if (k5mech->gm_upcall_enctypes != NULL) - seq_printf(m, k5mech->gm_upcall_enctypes); - gss_mech_put(k5mech); -out: + seq_printf(m, KRB5_SUPPORTED_ENCTYPES); return 0; } @@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = { .llseek = seq_lseek, .release = single_release, }; -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); @@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h new file mode 100644 index 000000000000..ec6234eee89c --- /dev/null +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -0,0 +1,4 @@ +/* + * Dumb way to share this static piece of information with nfsd + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec2e469..c3b75333b821 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) From be1f4084b4824301e640e81d63b6275cd99ee6a1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 6 Jun 2011 11:22:17 -0700 Subject: [PATCH 022/327] nfsd: v4 support requires CRYPTO nfsd V4 support uses crypto interfaces, so select CRYPTO to fix build errors in 2.6.39: ERROR: "crypto_destroy_tfm" [fs/nfsd/nfsd.ko] undefined! ERROR: "crypto_alloc_base" [fs/nfsd/nfsd.ko] undefined! Reported-by: Wakko Warner Signed-off-by: Randy Dunlap Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 18b3e8975fe0..fbb2a5ef5817 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -82,6 +82,7 @@ config NFSD_V4 select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS + select CRYPTO help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). From 7d751f6f8c679f51b73d01a1b5269347a929004c Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Fri, 3 Jun 2011 12:21:23 -0400 Subject: [PATCH 023/327] nfsd: link returns nfserr_delay when breaking lease fix for commit 4795bb37effb7b8fe77e2d2034545d062d3788a8, nfsd: break lease on unlink, link, and rename if the LINK operation breaks a delegation, it returns NFS4ERR_NOENT (which is not a valid error in rfc 5661) instead of NFS4ERR_DELAY. the return value of nfsd_break_lease() in nfsd_link() must be converted from host_err to err Signed-off-by: Casey Bodley Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d5718273bb32..f3fb61b69a1e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1660,8 +1660,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (!dold->d_inode) goto out_drop_write; host_err = nfsd_break_lease(dold->d_inode); - if (host_err) + if (host_err) { + err = nfserrno(host_err); goto out_drop_write; + } host_err = vfs_link(dold, dirp, dnew); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); From c8fb13d04ad0d08772f637bee873e620fcc064c2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 27 May 2011 13:56:12 -0700 Subject: [PATCH 024/327] OMAP: PM debug: fix section mismatch warnings WARNING: arch/arm/mach-omap2/built-in.o(.text+0x423c): Section mismatch in reference from the function pm_dbg_regset_init() to the function .init.text:pm_dbg_init() The function pm_dbg_regset_init() references the function __init pm_dbg_init(). This is often because pm_dbg_regset_init lacks a __init annotation or the annotation of pm_dbg_init is wrong. Signed-off-by: Russell King Signed-off-by: Kevin Hilman --- arch/arm/mach-omap2/pm-debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index a5a83b358ddd..e01da45c0537 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -189,7 +189,7 @@ static struct dentry *pm_dbg_dir; static int pm_dbg_init_done; -static int __init pm_dbg_init(void); +static int pm_dbg_init(void); enum { DEBUG_FILE_COUNTERS = 0, @@ -595,7 +595,7 @@ static int option_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(pm_dbg_option_fops, option_get, option_set, "%llu\n"); -static int __init pm_dbg_init(void) +static int pm_dbg_init(void) { int i; struct dentry *d; From 345f79b3de7f6d651e4dba794af7c7303bdfd649 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 31 May 2011 16:08:09 -0700 Subject: [PATCH 025/327] OMAP: PM: omap_device: fix device power domain callbacks After commit 4d27e9dcff00a6425d779b065ec8892e4f391661 (PM: Make power domain callbacks take precedence over subsystem ones), the power domain callbacks need to call the driver callbacks instead of relying on the default subsystem (in this case, platform_bus) to handle the driver callbacks. Validated on 3430/n900, 3530/Overo. Signed-off-by: Kevin Hilman --- arch/arm/plat-omap/omap_device.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index a37b8eb65b76..49fc0df0c21f 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -539,20 +540,34 @@ int omap_early_device_register(struct omap_device *od) static int _od_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); + int ret; - return omap_device_idle(pdev); + ret = pm_generic_runtime_suspend(dev); + + if (!ret) + omap_device_idle(pdev); + + return ret; +} + +static int _od_runtime_idle(struct device *dev) +{ + return pm_generic_runtime_idle(dev); } static int _od_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); - return omap_device_enable(pdev); + omap_device_enable(pdev); + + return pm_generic_runtime_resume(dev); } static struct dev_power_domain omap_device_power_domain = { .ops = { .runtime_suspend = _od_runtime_suspend, + .runtime_idle = _od_runtime_idle, .runtime_resume = _od_runtime_resume, USE_PLATFORM_PM_SLEEP_OPS } From 942fd4225f72826b31d893582b6ae7e172bb3202 Mon Sep 17 00:00:00 2001 From: Austin Zhang Date: Sat, 28 May 2011 02:03:47 +0800 Subject: [PATCH 026/327] HID: hid-multitouch: add support for Chunghwa multi-touch panel Added Chunghwa hid multitouch panel support into hid-multitouch. Signed-off-by: Austin Zhang Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-multitouch.c | 5 +++++ 4 files changed, 10 insertions(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 67d2a7585934..36ca465c00ce 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -305,6 +305,7 @@ config HID_MULTITOUCH - 3M PCT touch screens - ActionStar dual touch panels - Cando dual touch panels + - Chunghwa panels - CVTouch panels - Cypress TrueTouch panels - Elo TouchSystems IntelliTouch Plus panels diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index c957c4b4fe70..f7440e8ce3e7 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1359,6 +1359,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, { HID_USB_DEVICE(USB_VENDOR_ID_CVTOUCH, USB_DEVICE_ID_CVTOUCH_SCREEN) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2bbaad70d76d..aecb5a4b8d6d 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -173,6 +173,9 @@ #define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d #define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618 +#define USB_VENDOR_ID_CHUNGHWAT 0x2247 +#define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH 0x0001 + #define USB_VENDOR_ID_CIDC 0x1677 #define USB_VENDOR_ID_CMEDIA 0x0d8c diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index ecd4d2db9e80..8bc32a0fa00e 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -593,6 +593,11 @@ static const struct hid_device_id mt_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) }, + /* Chunghwa Telecom touch panels */ + { .driver_data = MT_CLS_DEFAULT, + HID_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, + USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) }, + /* CVTouch panels */ { .driver_data = MT_CLS_DEFAULT, HID_USB_DEVICE(USB_VENDOR_ID_CVTOUCH, From c2f019713df67e09d32e1b3c12f147a83a579d25 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 1 Jun 2011 10:59:13 -0700 Subject: [PATCH 027/327] HID: MAINTAINERS: Update USB HID/HIDBP DRIVERS pattern It was moved by commit 1a978c50c6cf ("HID: Move hiddev.txt to the new Documentation/hid directory") so update the MAINTAINERS pattern too. cc: Jiri Kosina cc: Alan Ott Signed-off-by: Joe Perches Signed-off-by: Jiri Kosina --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0b415248ae25..8bd7caefd855 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6423,7 +6423,7 @@ M: Jiri Kosina L: linux-usb@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git S: Maintained -F: Documentation/usb/hiddev.txt +F: Documentation/hid/hiddev.txt F: drivers/hid/usbhid/ USB ISP116X DRIVER From 5bdbd4fa4df6891a6644d588c9a30d30e7c0af8e Mon Sep 17 00:00:00 2001 From: Srinivas KANDAGATLA Date: Wed, 8 Jun 2011 15:22:39 +0900 Subject: [PATCH 028/327] sh: Fix up xchg/cmpxchg corruption with gUSA RB. gUSA special cases r15 for part of its login/out sequence, meaning that any parameters need to be explicitly prohibited from accidentally being assigned that particular register, and the compiler ultimately needs to use a temporary instead. Certain configurations have begun generating code paths that do indeed get allocated r15, resulting in immediate corruption of the exchanged value. This was observed in (amongst others) exit_mm() code generation where the xchg_u32 call was immediately corrupting a structure address. As this is a general gUSA restriction, the rest of the users likewise need to be updated to ensure sensible constraints. References: https://bugzilla.stlinux.com/show_bug.cgi?id=11229 Signed-off-by: Srinivas Kandagatla Reviewed-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/include/asm/cmpxchg-grb.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/sh/include/asm/cmpxchg-grb.h b/arch/sh/include/asm/cmpxchg-grb.h index 4676bf57693a..f848dec9e483 100644 --- a/arch/sh/include/asm/cmpxchg-grb.h +++ b/arch/sh/include/asm/cmpxchg-grb.h @@ -15,8 +15,9 @@ static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val) " mov.l %2, @%1 \n\t" /* store new value */ "1: mov r1, r15 \n\t" /* LOGOUT */ : "=&r" (retval), - "+r" (m) - : "r" (val) + "+r" (m), + "+r" (val) /* inhibit r15 overloading */ + : : "memory", "r0", "r1"); return retval; @@ -36,8 +37,9 @@ static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val) " mov.b %2, @%1 \n\t" /* store new value */ "1: mov r1, r15 \n\t" /* LOGOUT */ : "=&r" (retval), - "+r" (m) - : "r" (val) + "+r" (m), + "+r" (val) /* inhibit r15 overloading */ + : : "memory" , "r0", "r1"); return retval; @@ -54,13 +56,14 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, " nop \n\t" " mov r15, r1 \n\t" /* r1 = saved sp */ " mov #-8, r15 \n\t" /* LOGIN */ - " mov.l @%1, %0 \n\t" /* load old value */ - " cmp/eq %0, %2 \n\t" + " mov.l @%3, %0 \n\t" /* load old value */ + " cmp/eq %0, %1 \n\t" " bf 1f \n\t" /* if not equal */ - " mov.l %3, @%1 \n\t" /* store new value */ + " mov.l %2, @%3 \n\t" /* store new value */ "1: mov r1, r15 \n\t" /* LOGOUT */ - : "=&r" (retval) - : "r" (m), "r" (old), "r" (new) + : "=&r" (retval), + "+r" (old), "+r" (new) /* old or new can be r15 */ + : "r" (m) : "memory" , "r0", "r1", "t"); return retval; From 7416401661fad5c7ee9edb17cb54e7bff7a74dfe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Jun 2011 11:51:43 +0200 Subject: [PATCH 029/327] arm: davinci: Fix fallout from generic irq chip conversion The code which does the chained handler setup was overwriting chip_data. Signed-off-by: Thomas Gleixner Tested-by: Holger Hans Peter Freyther Signed-off-by: Kevin Hilman Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/gpio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c index a0b838894ac9..e7221398e5af 100644 --- a/arch/arm/mach-davinci/gpio.c +++ b/arch/arm/mach-davinci/gpio.c @@ -252,9 +252,11 @@ static struct irq_chip gpio_irqchip = { static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) { - struct davinci_gpio_regs __iomem *g = irq2regs(irq); + struct davinci_gpio_regs __iomem *g; u32 mask = 0xffff; + g = (__force struct davinci_gpio_regs __iomem *) irq_desc_get_handler_data(desc); + /* we only care about one bank */ if (irq & 1) mask <<= 16; @@ -422,8 +424,7 @@ static int __init davinci_gpio_irq_setup(void) /* set up all irqs in this bank */ irq_set_chained_handler(bank_irq, gpio_irq_handler); - irq_set_chip_data(bank_irq, (__force void *)g); - irq_set_handler_data(bank_irq, (void *)irq); + irq_set_handler_data(bank_irq, (__force void *)g); for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) { irq_set_chip(irq, &gpio_irqchip); From 7e9f1945213cdd7cd11f29346ded07a81854b5af Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Sun, 5 Jun 2011 17:33:33 +0530 Subject: [PATCH 030/327] davinci: make PCM platform devices static Make the PCM device structures used in devices.c and devices-da8xx.c static as they are used only in the respective files. This was found when trying to build a single image for DaVinci and DA8x devices using runtime P2V support. Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/devices-da8xx.c | 2 +- arch/arm/mach-davinci/devices.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 4e66881c7aee..fc4e98ea7543 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -494,7 +494,7 @@ static struct platform_device da850_mcasp_device = { .resource = da850_mcasp_resources, }; -struct platform_device davinci_pcm_device = { +static struct platform_device davinci_pcm_device = { .name = "davinci-pcm-audio", .id = -1, }; diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 8f4f736aa267..806a2f02b980 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -298,7 +298,7 @@ static void davinci_init_wdt(void) /*-------------------------------------------------------------------------*/ -struct platform_device davinci_pcm_device = { +static struct platform_device davinci_pcm_device = { .name = "davinci-pcm-audio", .id = -1, }; From 07989b7ad63af424886ff922fd3bcca9e00ffa78 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Jun 2011 10:10:27 +0100 Subject: [PATCH 031/327] Revert "ARM: 6943/1: mm: use TTBR1 instead of reserved context ID" This reverts commit 52af9c6cd863fe37d1103035ec7ee22ac1296458. Will Deacon reports that: In 52af9c6c ("ARM: 6943/1: mm: use TTBR1 instead of reserved context ID") I updated the ASID rollover code to use only the kernel page tables whilst updating the ASID. Unfortunately, the code to restore the user page tables was part of a later patch which isn't yet in mainline, so this leaves the code quite broken. We're also in the process of eliminating __ARCH_WANT_INTERRUPTS_ON_CTXSW from ARM, so lets revert these until we can properly sort out what we're doing with the ARM context switching. Signed-off-by: Russell King --- arch/arm/mm/context.c | 11 +++++------ arch/arm/mm/proc-v7.S | 10 ++++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 8bfae964b133..b6c776ae4039 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -24,7 +24,9 @@ DEFINE_PER_CPU(struct mm_struct *, current_mm); /* * We fork()ed a process, and we need a new context for the child - * to run in. + * to run in. We reserve version 0 for initial tasks so we will + * always allocate an ASID. The ASID 0 is reserved for the TTBR + * register changing sequence. */ void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -34,11 +36,8 @@ void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) static void flush_context(void) { - u32 ttb; - /* Copy TTBR1 into TTBR0 */ - asm volatile("mrc p15, 0, %0, c2, c0, 1\n" - "mcr p15, 0, %0, c2, c0, 0" - : "=r" (ttb)); + /* set the reserved ASID before flushing the TLB */ + asm("mcr p15, 0, %0, c13, c0, 1\n" : : "r" (0)); isb(); local_flush_tlb_all(); if (icache_is_vivt_asid_tagged()) { diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index b3b566ec83d3..3c3867850a30 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -108,16 +108,18 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif - mrc p15, 0, r2, c2, c0, 1 @ load TTB 1 - mcr p15, 0, r2, c2, c0, 0 @ into TTB 0 +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif + mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID + isb +1: mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 isb #ifdef CONFIG_ARM_ERRATA_754322 dsb #endif mcr p15, 0, r1, c13, c0, 1 @ set context ID isb - mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 - isb #endif mov pc, lr ENDPROC(cpu_v7_switch_mm) From a0a54d37b4b1d1f55d1e81e8ffc223bb85472fa3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Jun 2011 10:12:41 +0100 Subject: [PATCH 032/327] Revert "ARM: 6944/1: mm: allow ASID 0 to be allocated to tasks" This reverts commit 45b95235b0ac86cef2ad4480b0618b8778847479. Will Deacon reports that: In 52af9c6c ("ARM: 6943/1: mm: use TTBR1 instead of reserved context ID") I updated the ASID rollover code to use only the kernel page tables whilst updating the ASID. Unfortunately, the code to restore the user page tables was part of a later patch which isn't yet in mainline, so this leaves the code quite broken. We're also in the process of eliminating __ARCH_WANT_INTERRUPTS_ON_CTXSW from ARM, so lets revert these until we can properly sort out what we're doing with the context switching. Signed-off-by: Russell King --- arch/arm/mm/context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index b6c776ae4039..b0ee9ba3cfab 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -93,7 +93,7 @@ static void reset_context(void *info) return; smp_rmb(); - asid = cpu_last_asid + cpu; + asid = cpu_last_asid + cpu + 1; flush_context(); set_mm_context(mm, asid); @@ -143,13 +143,13 @@ void __new_context(struct mm_struct *mm) * to start a new version and flush the TLB. */ if (unlikely((asid & ~ASID_MASK) == 0)) { - asid = cpu_last_asid + smp_processor_id(); + asid = cpu_last_asid + smp_processor_id() + 1; flush_context(); #ifdef CONFIG_SMP smp_wmb(); smp_call_function(reset_context, NULL, 1); #endif - cpu_last_asid += NR_CPUS - 1; + cpu_last_asid += NR_CPUS; } set_mm_context(mm, asid); From 6075e9df471e35f2ebf4c73c95c304d0473bd4b2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 7 Jun 2011 09:40:51 +0100 Subject: [PATCH 033/327] ARM: 6949/2: mach-u300: fix compilaton warning in IO accessors The IO accessors for U300 were using u32 rather than the nominal void __iomem * type, rectify this by properly defining the virtual base for statically mapped peripherals to be void __iomem *. Requires fixing a field in struct clk as well. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-u300/clock.h | 2 +- arch/arm/mach-u300/include/mach/u300-regs.h | 22 ++++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/arm/mach-u300/clock.h b/arch/arm/mach-u300/clock.h index c34f3ea3017c..4f50ca8f901e 100644 --- a/arch/arm/mach-u300/clock.h +++ b/arch/arm/mach-u300/clock.h @@ -31,7 +31,7 @@ struct clk { bool reset; __u16 clk_val; __s8 usecount; - __u32 res_reg; + void __iomem * res_reg; __u16 res_mask; bool hw_ctrld; diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h index 8b85df4c8d8f..035fdc9dbdb0 100644 --- a/arch/arm/mach-u300/include/mach/u300-regs.h +++ b/arch/arm/mach-u300/include/mach/u300-regs.h @@ -18,6 +18,12 @@ * the defines are used for setting up the I/O memory mapping. */ +#ifdef __ASSEMBLER__ +#define IOMEM(a) (a) +#else +#define IOMEM(a) (void __iomem *) a +#endif + /* NAND Flash CS0 */ #define U300_NAND_CS0_PHYS_BASE 0x80000000 @@ -47,13 +53,6 @@ #define U300_SEMI_CONFIG_BASE 0x30000000 #endif -/* - * All the following peripherals are specified at their PHYSICAL address, - * so if you need to access them (in the kernel), you MUST use the macros - * defined in to map to the IO_ADDRESS_AHB() IO_ADDRESS_FAST() - * etc. - */ - /* * AHB peripherals */ @@ -63,11 +62,11 @@ /* Vectored Interrupt Controller 0, servicing 32 interrupts */ #define U300_INTCON0_BASE (U300_AHB_PER_PHYS_BASE+0x1000) -#define U300_INTCON0_VBASE (U300_AHB_PER_VIRT_BASE+0x1000) +#define U300_INTCON0_VBASE IOMEM(U300_AHB_PER_VIRT_BASE+0x1000) /* Vectored Interrupt Controller 1, servicing 32 interrupts */ #define U300_INTCON1_BASE (U300_AHB_PER_PHYS_BASE+0x2000) -#define U300_INTCON1_VBASE (U300_AHB_PER_VIRT_BASE+0x2000) +#define U300_INTCON1_VBASE IOMEM(U300_AHB_PER_VIRT_BASE+0x2000) /* Memory Stick Pro (MSPRO) controller */ #define U300_MSPRO_BASE (U300_AHB_PER_PHYS_BASE+0x3000) @@ -115,7 +114,7 @@ /* SYSCON */ #define U300_SYSCON_BASE (U300_SLOW_PER_PHYS_BASE+0x1000) -#define U300_SYSCON_VBASE (U300_SLOW_PER_VIRT_BASE+0x1000) +#define U300_SYSCON_VBASE IOMEM(U300_SLOW_PER_VIRT_BASE+0x1000) /* Watchdog */ #define U300_WDOG_BASE (U300_SLOW_PER_PHYS_BASE+0x2000) @@ -125,7 +124,7 @@ /* APP side special timer */ #define U300_TIMER_APP_BASE (U300_SLOW_PER_PHYS_BASE+0x4000) -#define U300_TIMER_APP_VBASE (U300_SLOW_PER_VIRT_BASE+0x4000) +#define U300_TIMER_APP_VBASE IOMEM(U300_SLOW_PER_VIRT_BASE+0x4000) /* Keypad */ #define U300_KEYPAD_BASE (U300_SLOW_PER_PHYS_BASE+0x5000) @@ -181,5 +180,4 @@ * Virtual accessor macros for static devices */ - #endif From f506cd48a4236b7045d092c9b92709ae6b4bdaf0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 9 Jun 2011 04:58:36 +0100 Subject: [PATCH 034/327] ARM: 6953/1: DT: don't try to access physical address zero If the DT physical address is zero, this is equivalent to no DT. Especially when the actual RAM physical address is not located at zero, the result of phys_to_virt() would point to la-la-land and crash the kernel, which crash is completely silent this early during boot. Signed-off-by: Nicolas Pitre Acked-by: Grant Likely Signed-off-by: Russell King --- arch/arm/kernel/devtree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index a701e4226a6c..0cdd7b456cb2 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -76,6 +76,9 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) unsigned long dt_root; const char *model; + if (!dt_phys) + return NULL; + devtree = phys_to_virt(dt_phys); /* check device tree validity */ From 720c60e1943a06cfd9472ad5a9967dec304e4394 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 9 Jun 2011 05:05:27 +0100 Subject: [PATCH 035/327] ARM: 6954/1: zImage: fix Thumb2 breakage Commit af3e4fd37a "ARM: 6859/1: Add writethrough dcache support for ARM926EJS processor" broke Thumb2 compilation by omitting to maintain the wide encoding for the added branch instructions which made the ARM926EJ-S record smaller than expected, breaking the record walk code. Signed-off-by: Nicolas Pitre Cc: Mark A. Greer Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index f9da41921c52..942fad97e447 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -691,9 +691,9 @@ proc_types: .word 0x41069260 @ ARM926EJ-S (v5TEJ) .word 0xff0ffff0 - b __arm926ejs_mmu_cache_on - b __armv4_mmu_cache_off - b __armv5tej_mmu_cache_flush + W(b) __arm926ejs_mmu_cache_on + W(b) __armv4_mmu_cache_off + W(b) __armv5tej_mmu_cache_flush .word 0x00007000 @ ARM7 IDs .word 0x0000f000 From 373ce3020b03fb6199415ab866595c7d627bbc97 Mon Sep 17 00:00:00 2001 From: Po-Yu Chuang Date: Thu, 9 Jun 2011 08:42:17 +0100 Subject: [PATCH 036/327] ARM: 6955/1: cmpxchg syscall should data abort if page not write If the page to cmpxchg is user mode read only (not write), we should simulate a data abort first. Signed-off-by: Po-Yu Chuang Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d52eec268b47..c5ead3c2c618 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -563,7 +563,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) if (!pmd_present(*pmd)) goto bad_access; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!pte_present(*pte) || !pte_dirty(*pte)) { + if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) { pte_unmap_unlock(pte, ptl); goto bad_access; } From db5e7ecc4abc91b9f26f0c0d79ef88a51e987d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Jun 2011 08:40:59 -0400 Subject: [PATCH 037/327] tracing: Fix regression in printk_formats file The fix to fix the printk_formats of modules broke the printk_formats of trace_printks in the kernel. The update of what to show via the seq_file was only updated if the passed in fmt was NULL, which happens only on the first iteration. The result was showing the first format every time instead of iterating through the available formats. Signed-off-by: Steven Rostedt --- kernel/trace/trace_printk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index dff763b7baf1..1f06468a10d7 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -240,13 +240,10 @@ static const char **find_next(void *v, loff_t *pos) const char **fmt = v; int start_index; - if (!fmt) - fmt = __start___trace_bprintk_fmt + *pos; - start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; if (*pos < start_index) - return fmt; + return __start___trace_bprintk_fmt + *pos; return find_next_mod_format(start_index, v, fmt, pos); } From a91d92875ee94e4703fd017ccaadb48cfb344994 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 3 Jun 2011 09:51:34 +0000 Subject: [PATCH 038/327] xen: partially revert "xen: set max_pfn_mapped to the last pfn mapped" We only need to set max_pfn_mapped to the last pfn mapped on x86_64 to make sure that cleanup_highmap doesn't remove important mappings at _end. We don't need to do this on x86_32 because cleanup_highmap is not called on x86_32. Besides lowering max_pfn_mapped on x86_32 has the unwanted side effect of limiting the amount of memory available for the 1:1 kernel pagetable allocation. This patch reverts the x86_32 part of the original patch. CC: stable@kernel.org Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dc708dcc62f1..afe1d54f980c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1599,6 +1599,11 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; +#ifdef CONFIG_X86_32 + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; +#endif + if (!pte_none(pte_page[pteidx])) continue; @@ -1766,7 +1771,9 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + + xen_start_info->nr_pt_frames * PAGE_SIZE + + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); From 8d03e971cf403305217b8e62db3a2e5ad2d6263f Mon Sep 17 00:00:00 2001 From: Filip Palian Date: Thu, 12 May 2011 19:32:46 +0200 Subject: [PATCH 039/327] Bluetooth: l2cap and rfcomm: fix 1 byte infoleak to userspace. Structures "l2cap_conninfo" and "rfcomm_conninfo" have one padding byte each. This byte in "cinfo" is copied to userspace uninitialized. Signed-off-by: Filip Palian Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 18dc9888d8c2..8248303f44e8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -413,6 +413,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; } + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 386cfaffd4b7..1b10727ce523 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -788,6 +788,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); From a9011580a99cd4f21546381782582bfaf9e40675 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Jun 2011 23:21:11 +0100 Subject: [PATCH 040/327] ARM: extend Code: line by one 16-bit quantity for Thumb instructions Dump out the following 16-bit instruction to the faulting instruction in the Code: line. This allows Thumb-2 instructions to be properly encoded. Tested-by: Kevin Hilman Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index c5ead3c2c618..6807cb1e76dd 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -139,7 +139,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) fs = get_fs(); set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { + for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; if (thumb) From 0da67bed835fdde68ca0e924d2a2d6ac82c70833 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 May 2011 11:13:15 +0300 Subject: [PATCH 041/327] Bluetooth: fix shutdown on SCO sockets shutdown should wait for SCO link to be properly disconnected before detroying the socket, otherwise an application using the socket may assume link is properly disconnected before it really happens which can be a problem when e.g synchronizing profile switch. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 42fdffd1d76c..cb4fb7837e5c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -369,6 +369,15 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: + if (sco_pi(sk)->conn) { + sk->sk_state = BT_DISCONN; + sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); + hci_conn_put(sco_pi(sk)->conn->hcon); + sco_pi(sk)->conn->hcon = NULL; + } else + sco_chan_del(sk, ECONNRESET); + break; + case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); @@ -819,7 +828,9 @@ static void sco_chan_del(struct sock *sk, int err) conn->sk = NULL; sco_pi(sk)->conn = NULL; sco_conn_unlock(conn); - hci_conn_put(conn->hcon); + + if (conn->hcon) + hci_conn_put(conn->hcon); } sk->sk_state = BT_CLOSED; From 26e7acf315ba5fc5ac4e7cbdb422a345e59898bd Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 19 May 2011 17:37:45 -0400 Subject: [PATCH 042/327] Bluetooth: Do not ignore errors returned from strict_strtol() Signed-off-by: David S. Miller Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/btmrvl_debugfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bluetooth/btmrvl_debugfs.c b/drivers/bluetooth/btmrvl_debugfs.c index fd6305bf953e..8ecf4c6c2874 100644 --- a/drivers/bluetooth/btmrvl_debugfs.c +++ b/drivers/bluetooth/btmrvl_debugfs.c @@ -64,6 +64,8 @@ static ssize_t btmrvl_hscfgcmd_write(struct file *file, return -EFAULT; ret = strict_strtol(buf, 10, &result); + if (ret) + return ret; priv->btmrvl_dev.hscfgcmd = result; @@ -108,6 +110,8 @@ static ssize_t btmrvl_psmode_write(struct file *file, const char __user *ubuf, return -EFAULT; ret = strict_strtol(buf, 10, &result); + if (ret) + return ret; priv->btmrvl_dev.psmode = result; @@ -147,6 +151,8 @@ static ssize_t btmrvl_pscmd_write(struct file *file, const char __user *ubuf, return -EFAULT; ret = strict_strtol(buf, 10, &result); + if (ret) + return ret; priv->btmrvl_dev.pscmd = result; @@ -191,6 +197,8 @@ static ssize_t btmrvl_gpiogap_write(struct file *file, const char __user *ubuf, return -EFAULT; ret = strict_strtol(buf, 16, &result); + if (ret) + return ret; priv->btmrvl_dev.gpio_gap = result; @@ -230,6 +238,8 @@ static ssize_t btmrvl_hscmd_write(struct file *file, const char __user *ubuf, return -EFAULT; ret = strict_strtol(buf, 10, &result); + if (ret) + return ret; priv->btmrvl_dev.hscmd = result; if (priv->btmrvl_dev.hscmd) { @@ -272,6 +282,8 @@ static ssize_t btmrvl_hsmode_write(struct file *file, const char __user *ubuf, return -EFAULT; ret = strict_strtol(buf, 10, &result); + if (ret) + return ret; priv->btmrvl_dev.hsmode = result; From 7f4f0572df6c8eaa6a587bc212b0806ff37380dd Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Fri, 27 May 2011 11:16:21 +0300 Subject: [PATCH 043/327] Bluetooth: Do not send SET_EVENT_MASK for 1.1 and earlier devices Some old hci controllers do not accept any mask so leave the default mask on for these devices. < HCI Command: Set Event Mask (0x03|0x0001) plen 8 Mask: 0xfffffbff00000000 > HCI Event: Command Complete (0x0e) plen 4 Set Event Mask (0x03|0x0001) ncmd 1 status 0x12 Error: Invalid HCI Command Parameters Signed-off-by: Ville Tervo Tested-by: Corey Boyle Tested-by: Ed Tomlinson Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f13ddbf858ba..77930aa522e3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -477,14 +477,16 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * command otherwise */ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; - /* Events for 1.2 and newer controllers */ - if (hdev->lmp_ver > 1) { - events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ - } + /* CSR 1.1 dongles does not accept any bitfield so don't try to set + * any event mask for pre 1.2 devices */ + if (hdev->lmp_ver <= 1) + return; + + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x04; /* Inquiry Result with RSSI */ From 8f4b8c7613928d5c6da43715fedc00a7b1ee53be Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 11 Jun 2011 00:43:21 +0100 Subject: [PATCH 044/327] ARM: initrd: disable initrds outside of memory We can't cope with initrds outside of memory, so check that the initrd is within some declared memory to the kernel before using it. Otherwise we're likely to OOPS during boot. Signed-off-by: Russell King --- arch/arm/mm/init.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b2cf9460ea60..c19571c40a21 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -330,6 +330,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) memblock_reserve(__pa(_stext), _end - _stext); #endif #ifdef CONFIG_BLK_DEV_INITRD + if (phys_initrd_size && + !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n", + phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } if (phys_initrd_size && memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n", From e172dedd89a342b30fddfafd820d2ef40c039cea Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 11 Jun 2011 00:44:35 +0100 Subject: [PATCH 045/327] ARM: footbridge: fix debug macros More of the same of 5f2c1b30 (ARM: footbridge: fix debug macros), this time for the DC21285-based debugging code rather than the 8250- based debugging code. Signed-off-by: Russell King --- arch/arm/mach-footbridge/include/mach/debug-macro.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-footbridge/include/mach/debug-macro.S b/arch/arm/mach-footbridge/include/mach/debug-macro.S index 30b971d65815..1be2eeb7a0a0 100644 --- a/arch/arm/mach-footbridge/include/mach/debug-macro.S +++ b/arch/arm/mach-footbridge/include/mach/debug-macro.S @@ -26,6 +26,7 @@ #include #else +#include /* For EBSA285 debugging */ .equ dc21285_high, ARMCSR_BASE & 0xff000000 .equ dc21285_low, ARMCSR_BASE & 0x00ffffff @@ -36,8 +37,8 @@ .else mov \rp, #0 .endif - orr \rv, \rp, #0x42000000 - orr \rp, \rp, #dc21285_high + orr \rv, \rp, #dc21285_high + orr \rp, \rp, #0x42000000 .endm .macro senduart,rd,rx From 7d7975a0e1da7d6e558211b6296a96f1d6bf60ce Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 11 Jun 2011 00:46:17 +0100 Subject: [PATCH 046/327] ARM: footbridge: fix clock event support 4e8d7637 (ARM: footbridge: convert to clockevents/clocksource) did not set the cpumask for the clock event device. This causes boot to fail. Add the necessary initialization. Signed-off-by: Russell King --- arch/arm/mach-footbridge/dc21285-timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c index 5f1f9867fc70..121ad1d4fa39 100644 --- a/arch/arm/mach-footbridge/dc21285-timer.c +++ b/arch/arm/mach-footbridge/dc21285-timer.c @@ -103,6 +103,7 @@ static void __init footbridge_timer_init(void) clockevents_calc_mult_shift(ce, mem_fclk_21285, 5); ce->max_delta_ns = clockevent_delta2ns(0xffffff, ce); ce->min_delta_ns = clockevent_delta2ns(0x000004, ce); + ce->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(ce); } From c0da00145f9a32ef33b14508e6fd90fc130afbdc Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Sun, 12 Jun 2011 17:26:17 +0200 Subject: [PATCH 047/327] ALSA: hdspm - Fix locking in snd_hdspm_midi_input_read For the MIDI part, we need to acquire (and release) the hmidi->lock, access to the global hdspm structure is serialized through hmidi->hdspm->lock instead. Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 949691a876d3..32d80af012cc 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1639,12 +1639,14 @@ static int snd_hdspm_midi_input_read (struct hdspm_midi *hmidi) } } hmidi->pending = 0; + spin_unlock_irqrestore(&hmidi->lock, flags); + spin_lock_irqsave(&hmidi->hdspm->lock, flags); hmidi->hdspm->control_register |= hmidi->ie; hdspm_write(hmidi->hdspm, HDSPM_controlRegister, hmidi->hdspm->control_register); + spin_unlock_irqrestore(&hmidi->hdspm->lock, flags); - spin_unlock_irqrestore (&hmidi->lock, flags); return snd_hdspm_midi_output_write (hmidi); } From fedf1535ab5ee02acbbc235c2272d84bb9334758 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Sun, 12 Jun 2011 17:26:18 +0200 Subject: [PATCH 048/327] ALSA: hdspm - Fix jumping external wordclock frequency in AutoSync mode When using Word Clock on RME MADI cards, AutoSync mode was alternating betweeen MADI and WC due to a typo: AutoSync is indicated in the second status register (status2), not the first one (status). While the proc output was always correct, the reported WC frequency to ALSA was unstable as mentioned in http://mailman.alsa-project.org/pipermail/alsa-devel/2008-March/006723.html Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 32d80af012cc..d03ef94d570e 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1143,7 +1143,7 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) /* if wordclock has synced freq and wordclock is valid */ if ((status2 & HDSPM_wcLock) != 0 && - (status & HDSPM_SelSyncRef0) == 0) { + (status2 & HDSPM_SelSyncRef0) == 0) { rate_bits = status2 & HDSPM_wcFreqMask; From efef054e8c4bc4fd48a0b4deb5491116d9f557c7 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Sun, 12 Jun 2011 17:26:19 +0200 Subject: [PATCH 049/327] ALSA: hdspm - Add firmware revision ID for RME MADI PCI version The PCI version of the RME HDSP MADI card uses 0xcf as revision ID. Just add this to the list of supported cards. Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index d03ef94d570e..3f08afc0f0d3 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -521,6 +521,7 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) /* revisions >= 230 indicate AES32 card */ +#define HDSPM_MADI_OLD_REV 207 #define HDSPM_MADI_REV 210 #define HDSPM_RAYDAT_REV 211 #define HDSPM_AIO_REV 212 @@ -6379,6 +6380,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card, switch (hdspm->firmware_rev) { case HDSPM_MADI_REV: + case HDSPM_MADI_OLD_REV: hdspm->io_type = MADI; hdspm->card_name = "RME MADI"; hdspm->midiPorts = 3; From ac5d4b404e78bd7eb67fc70c2acb437a25497e98 Mon Sep 17 00:00:00 2001 From: Florian Zeitz Date: Sun, 12 Jun 2011 01:15:42 +0200 Subject: [PATCH 050/327] ALSA: emu10k1: Add details for E-mu 0404 PCIe version This patch adds the necessary details to support the PCIe version of E-MU's 0404 card. From comparing the PCBs it seems the PCIe version just added a PCIe chipset and left all other components pretty much in place. For anyone intrigued to take a look at the PCB there are pictures I took at . Signed-off-by: Florian Zeitz Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emu10k1_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 5e619a84da06..15f0161ce4a2 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1440,6 +1440,14 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0102_chip = 1, .spk71 = 1, .emu_model = EMU_MODEL_EMU0404}, /* EMU 0404 */ + /* EMU0404 PCIe */ + {.vendor = 0x1102, .device = 0x0008, .subsystem = 0x40051102, + .driver = "Audigy2", .name = "E-mu 0404 PCIe [MAEM8984]", + .id = "EMU0404", + .emu10k2_chip = 1, + .ca0108_chip = 1, + .spk71 = 1, + .emu_model = EMU_MODEL_EMU0404}, /* EMU 0404 PCIe ver_03 */ /* Note that all E-mu cards require kernel 2.6 or newer. */ {.vendor = 0x1102, .device = 0x0008, .driver = "Audigy2", .name = "SB Audigy 2 Value [Unknown]", From ff78fca2a03c08436535d3f7152a30752d8131d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2011 09:42:17 -0400 Subject: [PATCH 051/327] fix leak in proc_set_super() set_anon_super() can fail... Signed-off-by: Al Viro --- fs/proc/root.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/proc/root.c b/fs/proc/root.c index a9000e9cfee5..d6c3b416529b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data) static int proc_set_super(struct super_block *sb, void *data) { - struct pid_namespace *ns; - - ns = (struct pid_namespace *)data; - sb->s_fs_info = get_pid_ns(ns); - return set_anon_super(sb, NULL); + int err = set_anon_super(sb, NULL); + if (!err) { + struct pid_namespace *ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + } + return err; } static struct dentry *proc_mount(struct file_system_type *fs_type, From b1c27ab3f93daede979f804afc38b189c2f17c60 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2011 10:07:03 -0400 Subject: [PATCH 052/327] ubifs: split allocation of ubifs_info into a separate function preparation to ubifs sget() race fixes Signed-off-by: Al Viro --- fs/ubifs/super.c | 87 ++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index b5aeb5a8ebed..ddc3b02e8cf0 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1971,6 +1971,53 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) return ERR_PTR(-EINVAL); } +static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) +{ + struct ubifs_info *c; + + c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); + if (c) { + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); + mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + c->no_chk_data_crc = 1; + + c->highest_inum = UBIFS_FIRST_INO; + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + } + return c; +} + static int ubifs_fill_super(struct super_block *sb, void *data, int silent) { struct ubi_volume_desc *ubi = sb->s_fs_info; @@ -1978,49 +2025,11 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root; int err; - c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); + c = alloc_ubifs_info(ubi); if (!c) return -ENOMEM; - spin_lock_init(&c->cnt_lock); - spin_lock_init(&c->cs_lock); - spin_lock_init(&c->buds_lock); - spin_lock_init(&c->space_lock); - spin_lock_init(&c->orphan_lock); - init_rwsem(&c->commit_sem); - mutex_init(&c->lp_mutex); - mutex_init(&c->tnc_mutex); - mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); - mutex_init(&c->umount_mutex); - mutex_init(&c->bu_mutex); - mutex_init(&c->write_reserve_mutex); - init_waitqueue_head(&c->cmt_wq); - c->buds = RB_ROOT; - c->old_idx = RB_ROOT; - c->size_tree = RB_ROOT; - c->orph_tree = RB_ROOT; - INIT_LIST_HEAD(&c->infos_list); - INIT_LIST_HEAD(&c->idx_gc); - INIT_LIST_HEAD(&c->replay_list); - INIT_LIST_HEAD(&c->replay_buds); - INIT_LIST_HEAD(&c->uncat_list); - INIT_LIST_HEAD(&c->empty_list); - INIT_LIST_HEAD(&c->freeable_list); - INIT_LIST_HEAD(&c->frdi_idx_list); - INIT_LIST_HEAD(&c->unclean_leb_list); - INIT_LIST_HEAD(&c->old_buds); - INIT_LIST_HEAD(&c->orph_list); - INIT_LIST_HEAD(&c->orph_new); - c->no_chk_data_crc = 1; - c->vfs_sb = sb; - c->highest_inum = UBIFS_FIRST_INO; - c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; - - ubi_get_volume_info(ubi, &c->vi); - ubi_get_device_info(c->vi.ubi_num, &c->di); - /* Re-open the UBI device in read-write mode */ c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); if (IS_ERR(c->ubi)) { From d251ed271d528afb407cc2ede30923e34cb209a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2011 10:24:33 -0400 Subject: [PATCH 053/327] ubifs: fix sget races * allocate ubifs_info in ->mount(), fill it enough for sb_test() and set ->s_fs_info to it in set() callback passed to sget(). * do *not* free it in ->put_super(); do that in ->kill_sb() after we'd done kill_anon_super(). * don't free it in ubifs_fill_super() either - deactivate_locked_super() done by caller when ubifs_fill_super() returns an error will take care of that sucker. * get rid of kludge with passing ubi to ubifs_fill_super() in ->s_fs_info; we only need it in alloc_ubifs_info(), so ubifs_fill_super() will need only ubifs_info. Which it will find in ->s_fs_info just fine, no need to reassign anything... As the result, sb_test() becomes safe to apply to all superblocks that can be found by sget() (and a kludge with temporary use of ->s_fs_info to store a pointer to very different structure goes away). Signed-off-by: Al Viro --- fs/ubifs/super.c | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ddc3b02e8cf0..8c892c2d5300 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1848,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb) bdi_destroy(&c->bdi); ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); - kfree(c); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -2020,21 +2019,16 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) static int ubifs_fill_super(struct super_block *sb, void *data, int silent) { - struct ubi_volume_desc *ubi = sb->s_fs_info; - struct ubifs_info *c; + struct ubifs_info *c = sb->s_fs_info; struct inode *root; int err; - c = alloc_ubifs_info(ubi); - if (!c) - return -ENOMEM; - c->vfs_sb = sb; /* Re-open the UBI device in read-write mode */ c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); if (IS_ERR(c->ubi)) { err = PTR_ERR(c->ubi); - goto out_free; + goto out; } /* @@ -2100,24 +2094,29 @@ out_bdi: bdi_destroy(&c->bdi); out_close: ubi_close_volume(c->ubi); -out_free: - kfree(c); +out: return err; } static int sb_test(struct super_block *sb, void *data) { - dev_t *dev = data; + struct ubifs_info *c1 = data; struct ubifs_info *c = sb->s_fs_info; - return c->vi.cdev == *dev; + return c->vi.cdev == c1->vi.cdev; +} + +static int sb_set(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); } static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, const char *name, void *data) { struct ubi_volume_desc *ubi; - struct ubi_volume_info vi; + struct ubifs_info *c; struct super_block *sb; int err; @@ -2134,19 +2133,24 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, name, (int)PTR_ERR(ubi)); return ERR_CAST(ubi); } - ubi_get_volume_info(ubi, &vi); - dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + c = alloc_ubifs_info(ubi); + if (!c) { + err = -ENOMEM; + goto out_close; + } - sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); + dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + + sb = sget(fs_type, sb_test, sb_set, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); - goto out_close; + kfree(c); } if (sb->s_root) { struct ubifs_info *c1 = sb->s_fs_info; - + kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); if (!!(flags & MS_RDONLY) != c1->ro_mount) { @@ -2155,11 +2159,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, } } else { sb->s_flags = flags; - /* - * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is - * replaced by 'c'. - */ - sb->s_fs_info = ubi; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; @@ -2179,11 +2178,18 @@ out_close: return ERR_PTR(err); } +static void kill_ubifs_super(struct super_block *s) +{ + struct ubifs_info *c = s->s_fs_info; + kill_anon_super(s); + kfree(c); +} + static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, .mount = ubifs_mount, - .kill_sb = kill_anon_super, + .kill_sb = kill_ubifs_super, }; /* From dde194a64bb5c3fd05d965775dc92e8a4920a53a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2011 16:01:21 -0400 Subject: [PATCH 054/327] afs: fix sget() races, close leak on umount * set ->s_fs_info in set() callback passed to sget() * allocate the thing and set it up enough for afs_test_super() before making it visible * have it freed in ->kill_sb() (current tree simply leaks it) * have ->put_super() leave ->s_fs_info->volume alone; it's too early for dropping it; do that from ->kill_sb() after having called kill_anon_super(). Signed-off-by: Al Viro --- fs/afs/super.c | 73 ++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/fs/afs/super.c b/fs/afs/super.c index fb240e8766d6..b7d48d7eaa1e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -31,8 +31,8 @@ static void afs_i_init_once(void *foo); static struct dentry *afs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); -static void afs_put_super(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .mount = afs_mount, - .kill_sb = kill_anon_super, + .kill_sb = afs_kill_super, .fs_flags = 0, }; @@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = { .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .evict_inode = afs_evict_inode, - .put_super = afs_put_super, .show_options = generic_show_options, }; @@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params, */ static int afs_test_super(struct super_block *sb, void *data) { - struct afs_mount_params *params = data; + struct afs_super_info *as1 = data; struct afs_super_info *as = sb->s_fs_info; - return as->volume == params->volume; + return as->volume == as1->volume; +} + +static int afs_set_super(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); } /* * fill in the superblock */ -static int afs_fill_super(struct super_block *sb, void *data) +static int afs_fill_super(struct super_block *sb, + struct afs_mount_params *params) { - struct afs_mount_params *params = data; - struct afs_super_info *as = NULL; + struct afs_super_info *as = sb->s_fs_info; struct afs_fid fid; struct dentry *root = NULL; struct inode *inode = NULL; @@ -302,22 +307,11 @@ static int afs_fill_super(struct super_block *sb, void *data) _enter(""); - /* allocate a superblock info record */ - as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); - if (!as) { - _leave(" = -ENOMEM"); - return -ENOMEM; - } - - afs_get_volume(params->volume); - as->volume = params->volume; - /* fill in the superblock */ sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; - sb->s_fs_info = as; sb->s_bdi = &as->volume->bdi; /* allocate the root inode and dentry */ @@ -326,7 +320,7 @@ static int afs_fill_super(struct super_block *sb, void *data) fid.unique = 1; inode = afs_iget(sb, params->key, &fid, NULL, NULL); if (IS_ERR(inode)) - goto error_inode; + return PTR_ERR(inode); if (params->autocell) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); @@ -342,16 +336,8 @@ static int afs_fill_super(struct super_block *sb, void *data) _leave(" = 0"); return 0; -error_inode: - ret = PTR_ERR(inode); - inode = NULL; error: iput(inode); - afs_put_volume(as->volume); - kfree(as); - - sb->s_fs_info = NULL; - _leave(" = %d", ret); return ret; } @@ -367,6 +353,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, struct afs_volume *vol; struct key *key; char *new_opts = kstrdup(options, GFP_KERNEL); + struct afs_super_info *as; int ret; _enter(",,%s,%p", dev_name, options); @@ -399,12 +386,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, ret = PTR_ERR(vol); goto error; } - params.volume = vol; + + /* allocate a superblock info record */ + as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); + if (!as) { + ret = -ENOMEM; + afs_put_volume(vol); + goto error; + } + as->volume = vol; /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, set_anon_super, ¶ms); + sb = sget(fs_type, afs_test_super, afs_set_super, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); + afs_put_volume(vol); + kfree(as); goto error; } @@ -422,16 +419,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); + afs_put_volume(vol); + kfree(as); } - afs_put_volume(params.volume); afs_put_cell(params.cell); kfree(new_opts); _leave(" = 0 [%p]", sb); return dget(sb->s_root); error: - afs_put_volume(params.volume); afs_put_cell(params.cell); key_put(params.key); kfree(new_opts); @@ -439,18 +436,12 @@ error: return ERR_PTR(ret); } -/* - * finish the unmounting process on the superblock - */ -static void afs_put_super(struct super_block *sb) +static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = sb->s_fs_info; - - _enter(""); - + kill_anon_super(sb); afs_put_volume(as->volume); - - _leave(""); + kfree(as); } /* From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jun 2011 21:13:01 -0400 Subject: [PATCH 055/327] Delay struct net freeing while there's a sysfs instance refering to it * new refcount in struct net, controlling actual freeing of the memory * new method in kobj_ns_type_operations (->drop_ns()) * ->current_ns() semantics change - it's supposed to be followed by corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps the new refcount; net_drop_ns() decrements it and calls net_free() if the last reference has been dropped. Method renamed to ->grab_current_ns(). * old net_free() callers call net_drop_ns() instead. * sysfs_exit_ns() is gone, along with a large part of callchain leading to it; now that the references stored in ->ns[...] stay valid we do not need to hunt them down and replace them with NULL. That fixes problems in sysfs_lookup() and sysfs_readdir(), along with getting rid of sb->s_instances abuse. Note that struct net *shutdown* logics has not changed - net_cleanup() is called exactly when it used to be called. The only thing postponed by having a sysfs instance refering to that struct net is actual freeing of memory occupied by struct net. Signed-off-by: Al Viro --- fs/sysfs/mount.c | 37 +++++++++++-------------------------- fs/sysfs/sysfs.h | 2 +- include/linux/kobject_ns.h | 10 ++++++---- include/linux/sysfs.h | 7 ------- include/net/net_namespace.h | 10 +++++++++- lib/kobject.c | 26 +++++++++----------------- net/core/net-sysfs.c | 23 +++++++++-------------- net/core/net_namespace.c | 12 ++++++++++-- 8 files changed, 55 insertions(+), 72 deletions(-) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 266895783b47..e34f0d99ea4e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) return error; } +static void free_sysfs_super_info(struct sysfs_super_info *info) +{ + int type; + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) + kobj_ns_drop(type, info->ns[type]); + kfree(info); +} + static struct dentry *sysfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, return ERR_PTR(-ENOMEM); for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) - info->ns[type] = kobj_ns_current(type); + info->ns[type] = kobj_ns_grab_current(type); sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { @@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, static void sysfs_kill_sb(struct super_block *sb) { struct sysfs_super_info *info = sysfs_info(sb); - /* Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing sysfs_super_info. */ kill_anon_super(sb); - kfree(info); + free_sysfs_super_info(info); } static struct file_system_type sysfs_fs_type = { @@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { .kill_sb = sysfs_kill_sb, }; -void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) -{ - struct super_block *sb; - - mutex_lock(&sysfs_mutex); - spin_lock(&sb_lock); - list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { - struct sysfs_super_info *info = sysfs_info(sb); - /* - * If we see a superblock on the fs_supers/s_instances - * list the unmount has not completed and sb->s_fs_info - * points to a valid struct sysfs_super_info. - */ - /* Ignore superblocks with the wrong ns */ - if (info->ns[type] != ns) - continue; - info->ns[type] = NULL; - } - spin_unlock(&sb_lock); - mutex_unlock(&sysfs_mutex); -} - int __init sysfs_init(void) { int err = -ENOMEM; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3d28af31d863..2ed2404f3113 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { * instance). */ struct sysfs_super_info { - const void *ns[KOBJ_NS_TYPES]; + void *ns[KOBJ_NS_TYPES]; }; #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) extern struct sysfs_dirent sysfs_root; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 82cb5bf461fb..f66b065a8b5f 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -32,15 +32,17 @@ enum kobj_ns_type { /* * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace + * @grab_current_ns: return a new reference to calling task's namespace * @netlink_ns: return namespace to which a sock belongs (right?) * @initial_ns: return the initial namespace (i.e. init_net_ns) + * @drop_ns: drops a reference to namespace */ struct kobj_ns_type_operations { enum kobj_ns_type type; - const void *(*current_ns)(void); + void *(*grab_current_ns)(void); const void *(*netlink_ns)(struct sock *sk); const void *(*initial_ns)(void); + void (*drop_ns)(void *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); -const void *kobj_ns_current(enum kobj_ns_type type); +void *kobj_ns_grab_current(enum kobj_ns_type type); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); +void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c3acda60eee0..e2696d76a599 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); -/* Called to clear a ns tag when it is no longer valid */ -void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); - int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) { } -static inline void sysfs_exit_ns(int type, const void *tag) -{ -} - static inline int __must_check sysfs_init(void) { return 0; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2bf9ed9ef26b..aef430d779bd 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -35,8 +35,11 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { + atomic_t passive; /* To decided when the network + * namespace should be freed. + */ atomic_t count; /* To decided when the network - * namespace should be freed. + * namespace should be shut down. */ #ifdef NETNS_REFCNT_DEBUG atomic_t use_count; /* To track references we @@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2) { return net1 == net2; } + +extern void net_drop_ns(void *); + #else static inline struct net *get_net(struct net *net) @@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2) { return 1; } + +#define net_drop_ns NULL #endif diff --git a/lib/kobject.c b/lib/kobject.c index 82dc34c095c2..640bd98a4c8a 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) } -const void *kobj_ns_current(enum kobj_ns_type type) +void *kobj_ns_grab_current(enum kobj_ns_type type) { - const void *ns = NULL; + void *ns = NULL; spin_lock(&kobj_ns_type_lock); if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && kobj_ns_ops_tbl[type]) - ns = kobj_ns_ops_tbl[type]->current_ns(); + ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); return ns; @@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } -/* - * kobj_ns_exit - invalidate a namespace tag - * - * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) - * @ns: the actual namespace being invalidated - * - * This is called when a tag is no longer valid. For instance, - * when a network namespace exits, it uses this helper to - * make sure no sb's sysfs_info points to the now-invalidated - * netns. - */ -void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +void kobj_ns_drop(enum kobj_ns_type type, void *ns) { - sysfs_exit_ns(type, ns); + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) + kobj_ns_ops_tbl[type]->drop_ns(ns); + spin_unlock(&kobj_ns_type_lock); } - EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8f..33d2a1fba131 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..cdcbc3cb00a9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -210,6 +211,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); From b84bd27fe70206f9253c395958134e4e4b7e55f0 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sun, 12 Jun 2011 08:22:08 +0200 Subject: [PATCH 056/327] HID: hid-multitouch: fix broken eGalax Since the inclusion of eGalax devices in 2.6.39, I've got some bug reports for 480d and other devices. The problem lies in the reports descriptors: eGalax supports both pen and fingers, and so the reports descriptors contained both. But hid-multitouch relies on them to detect the last item in each field to send the multitouch events. In 480d, the last item is not Y as it should but Pressure. That means that the fields are not aligned and X,Y are at 0,0 (the other touch coordinates of the report). With this patch, the detection is made only when the field ContactID has been detected inside the collection. There is still a problem with the detections of the range as stylus and fingers may not have the same min/max, but it's a start. Signed-off-by: Benjamin Tissoires Reviewed-by: Henrik Rydberg Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 57 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 8bc32a0fa00e..0b2dcd0ee591 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -64,6 +64,7 @@ struct mt_device { struct mt_class *mtclass; /* our mt device class */ unsigned last_field_index; /* last field index of the report */ unsigned last_slot_field; /* the last field of a slot */ + int last_mt_collection; /* last known mt-related collection */ __s8 inputmode; /* InputMode HID feature, -1 if non-existent */ __u8 num_received; /* how many contacts we received */ __u8 num_expected; /* expected last contact index */ @@ -225,8 +226,10 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, cls->sn_move); /* touchscreen emulation */ set_abs(hi->input, ABS_X, field, cls->sn_move); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_GD_Y: if (quirks & MT_QUIRK_EGALAX_XYZ_FIXUP) @@ -237,8 +240,10 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, cls->sn_move); /* touchscreen emulation */ set_abs(hi->input, ABS_Y, field, cls->sn_move); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; } return 0; @@ -246,31 +251,40 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, case HID_UP_DIGITIZER: switch (usage->hid) { case HID_DG_INRANGE: - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_CONFIDENCE: - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_TIPSWITCH: hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_TOUCH); input_set_capability(hi->input, EV_KEY, BTN_TOUCH); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_CONTACTID: input_mt_init_slots(hi->input, td->maxcontacts); td->last_slot_field = usage->hid; td->last_field_index = field->index; + td->last_mt_collection = usage->collection_index; return 1; case HID_DG_WIDTH: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_TOUCH_MAJOR); set_abs(hi->input, ABS_MT_TOUCH_MAJOR, field, cls->sn_width); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_HEIGHT: hid_map_usage(hi, usage, bit, max, @@ -279,8 +293,10 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, cls->sn_height); input_set_abs_params(hi->input, ABS_MT_ORIENTATION, 0, 1, 0, 0); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_TIPPRESSURE: if (quirks & MT_QUIRK_EGALAX_XYZ_FIXUP) @@ -292,16 +308,20 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, /* touchscreen emulation */ set_abs(hi->input, ABS_PRESSURE, field, cls->sn_pressure); - td->last_slot_field = usage->hid; - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) { + td->last_slot_field = usage->hid; + td->last_field_index = field->index; + } return 1; case HID_DG_CONTACTCOUNT: - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) + td->last_field_index = field->index; return 1; case HID_DG_CONTACTMAX: /* we don't set td->last_slot_field as contactcount and * contact max are global to the report */ - td->last_field_index = field->index; + if (td->last_mt_collection == usage->collection_index) + td->last_field_index = field->index; return -1; } /* let hid-input decide for the others */ @@ -516,6 +536,7 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) } td->mtclass = mtclass; td->inputmode = -1; + td->last_mt_collection = -1; hid_set_drvdata(hdev, td); ret = hid_parse(hdev); From 1fb74cda1b5e9c6207225fda5ef7504e815ce0e0 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 12 Jun 2011 22:44:10 -0400 Subject: [PATCH 057/327] jbd2: Remove obsolete parameters in the comments for some jbd2 functions credits isn't a parameter for jbd2_journal_get_write_access and jbd2_journal_get_undo_access. So remove the corresponding comments. Acked-by: Jan Kara Cc: Randy Dunlap Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/jbd2/transaction.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 3eec82d32fd4..547b101049e5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -814,7 +814,6 @@ out: * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes - * @credits: variable that will receive credits for the buffer * * Returns an error code or 0 on success. * @@ -932,7 +931,6 @@ out: * non-rewindable consequences * @handle: transaction * @bh: buffer to undo - * @credits: store the number of taken credits here (if not NULL) * * Sometimes there is a need to distinguish between metadata which has * been committed to disk and that which has not. The ext3fs code uses From 54463a66b91cf491a7c9af612b0e310babc5fa24 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 13 Jun 2011 08:32:06 +0200 Subject: [PATCH 058/327] ALSA: hda - Fix wrong auto-mute type for Acer Aspire-one The auto-mute setup for Acer Aspire-one with ALC268 was set wrongly during the clean-up of auto-mute function. Fixed now. Tested-by: Borislav Petkov Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 43fcfbd32847..61a774b3d3cb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -13316,9 +13316,8 @@ static void alc268_acer_lc_setup(struct hda_codec *codec) struct alc_spec *spec = codec->spec; spec->autocfg.hp_pins[0] = 0x15; spec->autocfg.speaker_pins[0] = 0x14; - spec->automute_mixer_nid[0] = 0x0f; spec->automute = 1; - spec->automute_mode = ALC_AUTOMUTE_MIXER; + spec->automute_mode = ALC_AUTOMUTE_AMP; spec->ext_mic.pin = 0x18; spec->ext_mic.mux_idx = 0; spec->int_mic.pin = 0x12; From 2308f4add3de9f6c9c9f02e49461e94d84bb200a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 12 Jun 2011 13:02:43 -0700 Subject: [PATCH 059/327] ALSA: hda - Fix beep_device compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using static inline functions can reduce compilation messages and macro misuse. sound/pci/hda/patch_conexant.c: In function ‘patch_cxt5045’: sound/pci/hda/patch_conexant.c:1232:3: warning: statement with no effect Signed-off-by: Joe Perches Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_beep.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h index f1de1bac042c..4967eabe774e 100644 --- a/sound/pci/hda/hda_beep.h +++ b/sound/pci/hda/hda_beep.h @@ -50,7 +50,12 @@ int snd_hda_enable_beep_device(struct hda_codec *codec, int enable); int snd_hda_attach_beep_device(struct hda_codec *codec, int nid); void snd_hda_detach_beep_device(struct hda_codec *codec); #else -#define snd_hda_attach_beep_device(...) 0 -#define snd_hda_detach_beep_device(...) +static inline int snd_hda_attach_beep_device(struct hda_codec *codec, int nid) +{ + return 0; +} +void snd_hda_detach_beep_device(struct hda_codec *codec) +{ +} #endif #endif From 8f4e0a18682d91abfad72ede3d3cb5f3ebdf54b4 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 13 Jun 2011 09:06:57 +0200 Subject: [PATCH 060/327] IPVS netns exit causes crash in conntrack Quote from Patric Mc Hardy "This looks like nfnetlink.c excited and destroyed the nfnl socket, but ip_vs was still holding a reference to a conntrack. When the conntrack got destroyed it created a ctnetlink event, causing an oops in netlink_has_listeners when trying to use the destroyed nfnetlink socket." If nf_conntrack_netlink is loaded before ip_vs this is not a problem. This patch simply avoids calling ip_vs_conn_drop_conntrack() when netns is dying as suggested by Julian. Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++++- net/netfilter/ipvs/ip_vs_core.c | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index bf28ac2fc99b..782db275ac53 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) + if (cp->flags & IP_VS_CONN_F_NFCT) { ip_vs_conn_drop_conntrack(cp); + /* Do not access conntracks during subsys cleanup + * because nf_conntrack_find_get can not be used after + * conntrack cleanup for the net. + */ + smp_rmb(); + if (ipvs->enable) + ip_vs_conn_drop_conntrack(cp); + } ip_vs_pe_put(cp->pe); kfree(cp->pe_data); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 55af2242bccd..24c28d238dcb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1945,6 +1945,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); net_ipvs(net)->enable = 0; /* Disable packet reception */ + smp_wmb(); __ip_vs_sync_cleanup(net); LeaveFunction(2); } From 9d5ae7cd6cb9ead43336fec1094184d1dc740fbd Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Fri, 3 Jun 2011 20:24:03 +0000 Subject: [PATCH 061/327] omap: pandora: fix NAND support Commit d5ce2b65 "omap3630: nand: fix device size to work in polled mode" changed values for .devsize in nand platform data, now we have to pass NAND_BUSWIDTH_16 instead of '1' to select 16bit NAND. Update pandora's platform data accordingly, also specify appropriate transfer type. Signed-off-by: Grazvydas Ignotas Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-omap3pandora.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 1d10736c6d3c..a3d655c0a49b 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -86,7 +86,8 @@ static struct mtd_partition omap3pandora_nand_partitions[] = { static struct omap_nand_platform_data pandora_nand_data = { .cs = 0, - .devsize = 1, /* '0' for 8-bit, '1' for 16-bit device */ + .devsize = NAND_BUSWIDTH_16, + .xfer_type = NAND_OMAP_PREFETCH_DMA, .parts = omap3pandora_nand_partitions, .nr_parts = ARRAY_SIZE(omap3pandora_nand_partitions), }; From ac08aedfa5d3de0dcb3825b598d16c2e57991f54 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 13 Jun 2011 11:28:50 -0400 Subject: [PATCH 062/327] Btrfs: check the return value from set_anon_super Al Viro noticed we weren't checking for set_anon_super failures. This adds the required checks. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9f68c6898653..20c111b3fa0d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1312,7 +1312,9 @@ again: spin_lock_init(&root->cache_lock); init_waitqueue_head(&root->cache_wait); - set_anon_super(&root->anon_super, NULL); + ret = set_anon_super(&root->anon_super, NULL); + if (ret) + goto fail; if (btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; From f4c44016218a6fce357715b9bbabbbbe1f69853c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 13 Jun 2011 11:30:47 -0400 Subject: [PATCH 063/327] Btrfs: drop the delalloc_bytes check in shrink_delalloc Even when delalloc_bytes is zero, we may need to sleep while waiting for delalloc space. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b42efc2ded51..1f61bf5b4960 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (reserved == 0) return 0; - /* nothing to shrink - nothing to reclaim */ - if (root->fs_info->delalloc_bytes == 0) - return 0; - max_reclaim = min(reserved, to_reclaim); while (loops < 1024) { From de1b794130b130e77ffa975bb58cb843744f9ae5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jun 2011 15:38:22 -0400 Subject: [PATCH 064/327] jbd2: Fix oops in jbd2_journal_remove_journal_head() jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 28 +++++++------ fs/jbd2/commit.c | 33 +++++++++------- fs/jbd2/journal.c | 91 ++++++++++++++----------------------------- fs/jbd2/transaction.c | 65 ++++++++++++++++--------------- include/linux/jbd2.h | 2 - 5 files changed, 98 insertions(+), 121 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a32..2c62c5aae82f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { + /* + * Get our reference so that bh cannot be freed before + * we unlock it + */ + get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); } else { @@ -223,8 +227,8 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + get_bh(bh); if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -243,7 +247,6 @@ restart: */ released = __jbd2_journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } @@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, int ret = 0; if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); + get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; + get_bh(bh); J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } else { /* @@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* * journal_clean_one_cp_list * - * Find all the written-back checkpoint buffers in the given list and release them. + * Find all the written-back checkpoint buffers in the given list and + * release them. * * Called with the journal locked. * Called with j_list_lock held. @@ -663,8 +667,8 @@ out: * checkpoint lists. * * The function returns 1 if it frees the transaction, 0 otherwise. + * The function can free jh and bh. * - * This function is called with the journal locked. * This function is called with j_list_lock held. * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ @@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) } journal = transaction->t_journal; + JBUFFER_TRACE(jh, "removing from transaction"); __buffer_unlink(jh); jh->b_cp_transaction = NULL; + jbd2_journal_put_journal_head(jh); if (transaction->t_checkpoint_list != NULL || transaction->t_checkpoint_io_list != NULL) goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); /* * There is one special case to worry about: if we have just pulled the @@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) * The locking here around t_state is a bit sleazy. * See the comment at the end of jbd2_journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction->t_state != T_FINISHED) goto out; - } /* OK, that was the last buffer for the transaction: we can now safely remove this transaction from the log */ @@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) wake_up(&journal->j_wait_logspace); ret = 1; out: - JBUFFER_TRACE(jh, "exit"); return ret; } @@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + /* Get reference for checkpointing transaction */ + jbd2_journal_grab_journal_head(jh2bh(jh)); jh->b_cp_transaction = transaction; if (!transaction->t_checkpoint_list) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf92..eef6979821a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -848,10 +848,16 @@ restart_loop: while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; + int try_to_free = 0; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); + /* + * Get a reference so that bh cannot be freed before we are + * done with it. + */ + get_bh(bh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); @@ -914,28 +920,27 @@ restart_loop: __jbd2_journal_insert_checkpoint(jh, commit_transaction); if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means + /* + * The buffer on BJ_Forget list and not jbddirty means * it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __jbd2_journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - jbd2_journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); + * list. + */ + if (!jh->b_next_transaction) + try_to_free = 1; } + JBUFFER_TRACE(jh, "refile or unfile buffer"); + __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + if (try_to_free) + release_buffer_page(bh); /* Drops bh reference */ + else + __brelse(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a7826990304..0dfa5b598e68 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = jbd2_journal_add_journal_head(bh); * ... + * (Get another reference for transaction) + * jbd2_journal_grab_journal_head(bh); * jh->b_transaction = xxx; + * (Put original reference) * jbd2_journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) @@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __func__); - jbd2_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __func__); - jbd2_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd2_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd2_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void jbd2_journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void jbd2_journal_put_journal_head(struct journal_head *jh) @@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 547b101049e5..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -30,6 +30,7 @@ #include static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); +static void __jbd2_journal_unfile_buffer(struct journal_head *jh); /* * jbd2_get_transaction: obtain a new transaction_t object. @@ -764,7 +765,6 @@ repeat: if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); @@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); - jh->b_transaction = transaction; - /* first access by this transaction */ jh->b_modified = 0; @@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); } else { __jbd2_journal_unfile_buffer(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) mark_buffer_dirty(bh); /* Expose it to the VM */ } -void __jbd2_journal_unfile_buffer(struct journal_head *jh) +/* + * Remove buffer from all transactions. + * + * Called with bh_state lock and j_list_lock + * + * jh and bh may be already freed when this function returns. + */ +static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; + jbd2_journal_put_journal_head(jh); } void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) { - jbd_lock_bh_state(jh2bh(jh)); + struct buffer_head *bh = jh2bh(jh); + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); __jbd2_journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + jbd_unlock_bh_state(bh); + __brelse(bh); } /* @@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __jbd2_journal_remove_checkpoint(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); } } spin_unlock(&journal->j_list_lock); @@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, /* * We take our own ref against the journal_head here to avoid * having to add tons of locking around each instance of - * jbd2_journal_remove_journal_head() and * jbd2_journal_put_journal_head(). */ jh = jbd2_journal_grab_journal_head(bh); @@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) int may_free = 1; struct buffer_head *bh = jh2bh(jh); - __jbd2_journal_unfile_buffer(jh); - if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + __jbd2_journal_temp_unlink_buffer(jh); /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in @@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); + __jbd2_journal_unfile_buffer(jh); } return may_free; } @@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, if (jh->b_transaction) __jbd2_journal_temp_unlink_buffer(jh); + else + jbd2_journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { @@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, * already started to be used by a subsequent transaction, refile the * buffer on that transaction's metadata list. * - * Called under journal->j_list_lock - * + * Called under j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)) + * + * jh and bh may be already free when this function returns */ void __jbd2_journal_refile_buffer(struct journal_head *jh) { @@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); + /* + * We set b_transaction here because b_next_transaction will inherit + * our jh reference and thus __jbd2_journal_file_buffer() must not + * take a new one. + */ jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) @@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) } /* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. + * __jbd2_journal_refile_buffer() with necessary locking added. We take our + * bh reference so that we can safely unlock bh. * - * __jbd2_journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** + * The jh and bh may be freed by this call. */ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) { struct buffer_head *bh = jh2bh(jh); + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); - __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - spin_unlock(&journal->j_list_lock); __brelse(bh); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4ecb7b16b278..d087c2e7b2aa 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1024,7 +1024,6 @@ struct journal_s /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1165,7 +1164,6 @@ extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_in */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); -void jbd2_journal_remove_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* From b9cabe52c27cf834137f3aaa46da23bcf32284e8 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 12 Jun 2011 04:28:16 +0000 Subject: [PATCH 065/327] ieee802154: Don't leak memory in ieee802154_nl_fill_phy In net/ieee802154/nl-phy.c::ieee802154_nl_fill_phy() I see two small issues. 1) If the allocation of 'buf' fails we may just as well return -EMSGSIZE directly rather than jumping to 'out:' and do a pointless kfree(0). 2) We do not free 'buf' unless we jump to one of the error labels and this leaks memory. This patch should address both. Signed-off-by: Jesper Juhl Acked-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/nl-phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ed0eab39f531..02548b292b53 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -44,7 +44,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pr_debug("%s\n", __func__); if (!buf) - goto out; + return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); @@ -65,6 +65,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pages * sizeof(uint32_t), buf); mutex_unlock(&phy->pib_lock); + kfree(buf); return genlmsg_end(msg, hdr); nla_put_failure: From 1ffde03d2aa112750468cff07efc9e0a504517dd Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Sun, 12 Jun 2011 09:40:49 +0000 Subject: [PATCH 066/327] 3c503: fix broken IRQ autoprobing Fix broken IRQ autoprobing in 3c503 driver: - improper IRQ freeing (does not free IRQs causes WARN) - missing break when an working IRQ is found The driver works with this patch. Signed-off-by: Ondrej Zary Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/3c503.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/3c503.c b/drivers/net/3c503.c index d84f6e8903a5..5b732988d493 100644 --- a/drivers/net/3c503.c +++ b/drivers/net/3c503.c @@ -412,7 +412,7 @@ el2_open(struct net_device *dev) outb_p(0x04 << ((*irqp == 9) ? 2 : *irqp), E33G_IDCFR); outb_p(0x00, E33G_IDCFR); msleep(1); - free_irq(*irqp, el2_probe_interrupt); + free_irq(*irqp, &seen); if (!seen) continue; @@ -422,6 +422,7 @@ el2_open(struct net_device *dev) continue; if (retval < 0) goto err_disable; + break; } while (*++irqp); if (*irqp == 0) { From 9281b2a2e2e02ad4bcc2fdd11797709b815d5f8e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 13 Jun 2011 08:17:36 +0000 Subject: [PATCH 067/327] net/hplance: hplance_init() should be __devinit WARNING: vmlinux.o(.devinit.text+0x253e): Section mismatch in reference from the function hplance_init_one() to the function .init.text:hplance_init() The forward declaration had the correct attribute, but the actual function definition hadn't. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/hplance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hplance.c b/drivers/net/hplance.c index b6060f7538df..a900d5bf2948 100644 --- a/drivers/net/hplance.c +++ b/drivers/net/hplance.c @@ -135,7 +135,7 @@ static void __devexit hplance_remove_one(struct dio_dev *d) } /* Initialise a single lance board at the given DIO device */ -static void __init hplance_init(struct net_device *dev, struct dio_dev *d) +static void __devinit hplance_init(struct net_device *dev, struct dio_dev *d) { unsigned long va = (d->resource.start + DIO_VIRADDRBASE); struct hplance_private *lp; From 37f7ec38ea5c31180461f82e895e13fdd549b595 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 13 Jun 2011 23:52:02 +0200 Subject: [PATCH 068/327] ALSA: 6fire: Fix double-free bug in usb6fire_fw_ezusb_upload() We have a double-free bug in sound/usb/6fire/firmware.c::usb6fire_fw_ezusb_upload(). We already call release_firmware(fw) on line 258, so when we then do it again after usb6fire_fw_ezusb_write() returns <0, we have a double-free. Easily fixed by just removing the last call to release_firmware(). Signed-off-by: Jesper Juhl Signed-off-by: Takashi Iwai --- sound/usb/6fire/firmware.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index a91719d5918b..1e3ae3327dd3 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -270,7 +270,6 @@ static int usb6fire_fw_ezusb_upload( data = 0x00; /* resume ezusb cpu */ ret = usb6fire_fw_ezusb_write(device, 0xa0, 0xe600, &data, 1); if (ret < 0) { - release_firmware(fw); snd_printk(KERN_ERR PREFIX "unable to upload ezusb " "firmware %s: end message.\n", fwname); return ret; From dcee0bb713d0ba0d32c5ce6fe0c5aa22e6fc274a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 9 Jun 2011 06:35:08 +0000 Subject: [PATCH 069/327] dmaengine: shdma: SH_DMAC_MAX_CHANNELS message fix Fix the recently added SH_DMAC_MAX_CHANNELS handling code in 300e5f9 dmaengine: shdma: Fix SH_DMAC_MAX_CHANNELS handling Without this fix the shdma driver outputs silly messages in case SH_DMAC_MAX_CHANNELS happens to match the platform data: sh-dma-engine sh-dma-engine.0: Attempting to register 20 DMA channels when a max imum of 20 are supported. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/dma/shdma.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 2a638f9f09a2..028330044201 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -1221,6 +1221,11 @@ static int __init sh_dmae_probe(struct platform_device *pdev) } else { do { for (i = chanirq_res->start; i <= chanirq_res->end; i++) { + if (irq_cnt >= SH_DMAC_MAX_CHANNELS) { + irq_cap = 1; + break; + } + if ((errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE) chan_flag[irq_cnt] = IRQF_SHARED; @@ -1230,15 +1235,11 @@ static int __init sh_dmae_probe(struct platform_device *pdev) "Found IRQ %d for channel %d\n", i, irq_cnt); chan_irq[irq_cnt++] = i; - - if (irq_cnt >= SH_DMAC_MAX_CHANNELS) - break; } - if (irq_cnt >= SH_DMAC_MAX_CHANNELS) { - irq_cap = 1; + if (irq_cnt >= SH_DMAC_MAX_CHANNELS) break; - } + chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, ++irqres); } while (irq_cnt < pdata->channel_num && chanirq_res); From 5294206053f9ab522fbd6bfde1cf7629dc564d5e Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 9 Jun 2011 09:02:25 +0000 Subject: [PATCH 070/327] ARM: mach-shmobile: AG5EVM SDHI1 platform data update Add a flag for SDHI1 to enable SDIO IRQ, and remove DMA Engine slave id:s to disable DMA as a workaround. Tested on sh73a0/AG5EVM with a BCM4318-based SDIO card. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ag5evm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c index c95258c274c1..1e2aba23e0d6 100644 --- a/arch/arm/mach-shmobile/board-ag5evm.c +++ b/arch/arm/mach-shmobile/board-ag5evm.c @@ -382,10 +382,8 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state) } static struct sh_mobile_sdhi_info sh_sdhi1_platdata = { - .dma_slave_tx = SHDMA_SLAVE_SDHI1_TX, - .dma_slave_rx = SHDMA_SLAVE_SDHI1_RX, .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, - .tmio_caps = MMC_CAP_NONREMOVABLE, + .tmio_caps = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ, .tmio_ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, .set_pwr = ag5evm_sdhi1_set_pwr, }; From e2a53b7c5bcfb63114c02c32117ed62eae463dc2 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 9 Jun 2011 07:03:37 +0000 Subject: [PATCH 071/327] ARM: mach-shmobile: Mackerel USB platform data update This patch updates the board specific USB support code for the sh7372 Mackerel board. With this patch applied port CN22 is driven by the recently added renesas_usbhs driver using the first USB controller included in sh7372 aka USBHS0. Hotplugging of USBHS0 unfortunately has to be handled by software polling. The sh7372 SoC itself obviously supports hotplug notification by IRQ but on the Mackerel board this IRQ happens to be used for the touch screen. Also fix the pinmux configuration to avoid setting up unused pins and fix minor spelling errors. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-mackerel.c | 223 +++++++++++++++++------- 1 file changed, 159 insertions(+), 64 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 776f20560e72..1037bd2ffdb9 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -126,7 +126,7 @@ * ------+--------------------+--------------------+------- * IRQ0 | ICR1A.IRQ0SA=0010 | SDHI2 card detect | Low * IRQ6 | ICR1A.IRQ6SA=0011 | Ether(LAN9220) | High - * IRQ7 | ICR1A.IRQ7SA=0010 | LCD Tuch Panel | Low + * IRQ7 | ICR1A.IRQ7SA=0010 | LCD Touch Panel | Low * IRQ8 | ICR2A.IRQ8SA=0010 | MMC/SD card detect | Low * IRQ9 | ICR2A.IRQ9SA=0010 | KEY(TCA6408) | Low * IRQ21 | ICR4A.IRQ21SA=0011 | Sensor(ADXL345) | High @@ -165,10 +165,10 @@ * USB1 can become Host by r8a66597, and become Function by renesas_usbhs. * But don't select both drivers in same time. * These uses same IRQ number for request_irq(), and aren't supporting - * IRQF_SHARD / IORESOURCE_IRQ_SHAREABLE. + * IRQF_SHARED / IORESOURCE_IRQ_SHAREABLE. * * Actually these are old/new version of USB driver. - * This mean its register will be broken if it supports SHARD IRQ, + * This mean its register will be broken if it supports shared IRQ, */ /* @@ -562,7 +562,136 @@ out: clk_put(hdmi_ick); } -/* USB1 (Host) */ +/* USBHS0 is connected to CN22 which takes a USB Mini-B plug + * + * The sh7372 SoC has IRQ7 set aside for USBHS0 hotplug, + * but on this particular board IRQ7 is already used by + * the touch screen. This leaves us with software polling. + */ +#define USBHS0_POLL_INTERVAL (HZ * 5) + +struct usbhs_private { + unsigned int usbphyaddr; + unsigned int usbcrcaddr; + struct renesas_usbhs_platform_info info; + struct delayed_work work; + struct platform_device *pdev; +}; + +#define usbhs_get_priv(pdev) \ + container_of(renesas_usbhs_get_info(pdev), \ + struct usbhs_private, info) + +#define usbhs_is_connected(priv) \ + (!((1 << 7) & __raw_readw(priv->usbcrcaddr))) + +static int usbhs_get_vbus(struct platform_device *pdev) +{ + return usbhs_is_connected(usbhs_get_priv(pdev)); +} + +static void usbhs_phy_reset(struct platform_device *pdev) +{ + struct usbhs_private *priv = usbhs_get_priv(pdev); + + /* init phy */ + __raw_writew(0x8a0a, priv->usbcrcaddr); +} + +static int usbhs0_get_id(struct platform_device *pdev) +{ + return USBHS_GADGET; +} + +static void usbhs0_work_function(struct work_struct *work) +{ + struct usbhs_private *priv = container_of(work, struct usbhs_private, + work.work); + + renesas_usbhs_call_notify_hotplug(priv->pdev); + schedule_delayed_work(&priv->work, USBHS0_POLL_INTERVAL); +} + +static int usbhs0_hardware_init(struct platform_device *pdev) +{ + struct usbhs_private *priv = usbhs_get_priv(pdev); + + priv->pdev = pdev; + INIT_DELAYED_WORK(&priv->work, usbhs0_work_function); + schedule_delayed_work(&priv->work, USBHS0_POLL_INTERVAL); + return 0; +} + +static void usbhs0_hardware_exit(struct platform_device *pdev) +{ + struct usbhs_private *priv = usbhs_get_priv(pdev); + + cancel_delayed_work_sync(&priv->work); +} + +static u32 usbhs0_pipe_cfg[] = { + USB_ENDPOINT_XFER_CONTROL, + USB_ENDPOINT_XFER_ISOC, + USB_ENDPOINT_XFER_ISOC, + USB_ENDPOINT_XFER_BULK, + USB_ENDPOINT_XFER_BULK, + USB_ENDPOINT_XFER_BULK, + USB_ENDPOINT_XFER_INT, + USB_ENDPOINT_XFER_INT, + USB_ENDPOINT_XFER_INT, + USB_ENDPOINT_XFER_BULK, +}; + +static struct usbhs_private usbhs0_private = { + .usbcrcaddr = 0xe605810c, /* USBCR2 */ + .info = { + .platform_callback = { + .hardware_init = usbhs0_hardware_init, + .hardware_exit = usbhs0_hardware_exit, + .phy_reset = usbhs_phy_reset, + .get_id = usbhs0_get_id, + .get_vbus = usbhs_get_vbus, + }, + .driver_param = { + .buswait_bwait = 4, + .pipe_type = usbhs0_pipe_cfg, + .pipe_size = ARRAY_SIZE(usbhs0_pipe_cfg), + }, + }, +}; + +static struct resource usbhs0_resources[] = { + [0] = { + .name = "USBHS0", + .start = 0xe6890000, + .end = 0xe68900e6 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = evt2irq(0x1ca0) /* USB0_USB0I0 */, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usbhs0_device = { + .name = "renesas_usbhs", + .id = 0, + .dev = { + .platform_data = &usbhs0_private.info, + }, + .num_resources = ARRAY_SIZE(usbhs0_resources), + .resource = usbhs0_resources, +}; + +/* USBHS1 is connected to CN31 which takes a USB Mini-AB plug + * + * Use J30 to select between Host and Function. This setting + * can however not be detected by software. Hotplug of USBHS1 + * is provided via IRQ8. + */ +#define IRQ8 evt2irq(0x0300) + +/* USBHS1 USB Host support via r8a66597_hcd */ static void usb1_host_port_power(int port, int power) { if (!power) /* only power-on is supported for now */ @@ -579,9 +708,9 @@ static struct r8a66597_platdata usb1_host_data = { static struct resource usb1_host_resources[] = { [0] = { - .name = "USBHS", - .start = 0xE68B0000, - .end = 0xE68B00E6 - 1, + .name = "USBHS1", + .start = 0xe68b0000, + .end = 0xe68b00e6 - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -602,37 +731,14 @@ static struct platform_device usb1_host_device = { .resource = usb1_host_resources, }; -/* USB1 (Function) */ +/* USBHS1 USB Function support via renesas_usbhs */ + #define USB_PHY_MODE (1 << 4) #define USB_PHY_INT_EN ((1 << 3) | (1 << 2)) #define USB_PHY_ON (1 << 1) #define USB_PHY_OFF (1 << 0) #define USB_PHY_INT_CLR (USB_PHY_ON | USB_PHY_OFF) -struct usbhs_private { - unsigned int irq; - unsigned int usbphyaddr; - unsigned int usbcrcaddr; - struct renesas_usbhs_platform_info info; -}; - -#define usbhs_get_priv(pdev) \ - container_of(renesas_usbhs_get_info(pdev), \ - struct usbhs_private, info) - -#define usbhs_is_connected(priv) \ - (!((1 << 7) & __raw_readw(priv->usbcrcaddr))) - -static int usbhs1_get_id(struct platform_device *pdev) -{ - return USBHS_GADGET; -} - -static int usbhs1_get_vbus(struct platform_device *pdev) -{ - return usbhs_is_connected(usbhs_get_priv(pdev)); -} - static irqreturn_t usbhs1_interrupt(int irq, void *data) { struct platform_device *pdev = data; @@ -654,12 +760,10 @@ static int usbhs1_hardware_init(struct platform_device *pdev) struct usbhs_private *priv = usbhs_get_priv(pdev); int ret; - irq_set_irq_type(priv->irq, IRQ_TYPE_LEVEL_HIGH); - /* clear interrupt status */ __raw_writew(USB_PHY_MODE | USB_PHY_INT_CLR, priv->usbphyaddr); - ret = request_irq(priv->irq, usbhs1_interrupt, 0, + ret = request_irq(IRQ8, usbhs1_interrupt, IRQF_TRIGGER_HIGH, dev_name(&pdev->dev), pdev); if (ret) { dev_err(&pdev->dev, "request_irq err\n"); @@ -679,15 +783,7 @@ static void usbhs1_hardware_exit(struct platform_device *pdev) /* clear interrupt status */ __raw_writew(USB_PHY_MODE | USB_PHY_INT_CLR, priv->usbphyaddr); - free_irq(priv->irq, pdev); -} - -static void usbhs1_phy_reset(struct platform_device *pdev) -{ - struct usbhs_private *priv = usbhs_get_priv(pdev); - - /* init phy */ - __raw_writew(0x8a0a, priv->usbcrcaddr); + free_irq(IRQ8, pdev); } static u32 usbhs1_pipe_cfg[] = { @@ -710,16 +806,14 @@ static u32 usbhs1_pipe_cfg[] = { }; static struct usbhs_private usbhs1_private = { - .irq = evt2irq(0x0300), /* IRQ8 */ - .usbphyaddr = 0xE60581E2, /* USBPHY1INTAP */ - .usbcrcaddr = 0xE6058130, /* USBCR4 */ + .usbphyaddr = 0xe60581e2, /* USBPHY1INTAP */ + .usbcrcaddr = 0xe6058130, /* USBCR4 */ .info = { .platform_callback = { .hardware_init = usbhs1_hardware_init, .hardware_exit = usbhs1_hardware_exit, - .phy_reset = usbhs1_phy_reset, - .get_id = usbhs1_get_id, - .get_vbus = usbhs1_get_vbus, + .phy_reset = usbhs_phy_reset, + .get_vbus = usbhs_get_vbus, }, .driver_param = { .buswait_bwait = 4, @@ -731,9 +825,9 @@ static struct usbhs_private usbhs1_private = { static struct resource usbhs1_resources[] = { [0] = { - .name = "USBHS", - .start = 0xE68B0000, - .end = 0xE68B00E6 - 1, + .name = "USBHS1", + .start = 0xe68b0000, + .end = 0xe68b00e6 - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -752,7 +846,6 @@ static struct platform_device usbhs1_device = { .resource = usbhs1_resources, }; - /* LED */ static struct gpio_led mackerel_leds[] = { { @@ -1203,6 +1296,7 @@ static struct platform_device *mackerel_devices[] __initdata = { &nor_flash_device, &smc911x_device, &lcdc_device, + &usbhs0_device, &usb1_host_device, &usbhs1_device, &leds_device, @@ -1301,6 +1395,7 @@ static void __init mackerel_map_io(void) #define GPIO_PORT9CR 0xE6051009 #define GPIO_PORT10CR 0xE605100A +#define GPIO_PORT167CR 0xE60520A7 #define GPIO_PORT168CR 0xE60520A8 #define SRCR4 0xe61580bc #define USCCR1 0xE6058144 @@ -1354,17 +1449,17 @@ static void __init mackerel_init(void) gpio_request(GPIO_PORT151, NULL); /* LCDDON */ gpio_direction_output(GPIO_PORT151, 1); - /* USB enable */ - gpio_request(GPIO_FN_VBUS0_1, NULL); - gpio_request(GPIO_FN_IDIN_1_18, NULL); - gpio_request(GPIO_FN_PWEN_1_115, NULL); - gpio_request(GPIO_FN_OVCN_1_114, NULL); - gpio_request(GPIO_FN_EXTLP_1, NULL); - gpio_request(GPIO_FN_OVCN2_1, NULL); - gpio_pull_down(GPIO_PORT168CR); + /* USBHS0 */ + gpio_request(GPIO_FN_VBUS0_0, NULL); + gpio_pull_down(GPIO_PORT168CR); /* VBUS0_0 pull down */ - /* setup USB phy */ - __raw_writew(0x8a0a, 0xE6058130); /* USBCR4 */ + /* USBHS1 */ + gpio_request(GPIO_FN_VBUS0_1, NULL); + gpio_pull_down(GPIO_PORT167CR); /* VBUS0_1 pull down */ + gpio_request(GPIO_FN_IDIN_1_113, NULL); + + /* USB phy tweak to make the r8a66597_hcd host driver work */ + __raw_writew(0x8a0a, 0xe6058130); /* USBCR4 */ /* enable FSI2 port A (ak4643) */ gpio_request(GPIO_FN_FSIAIBT, NULL); From 485b2ab55477f46cd1f7d16ba9f87cd074051811 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 9 Jun 2011 06:20:03 +0000 Subject: [PATCH 072/327] ARM: mach-shmobile: sh73a0 gic_arch_extn.irq_set_wake() fix Initialize ->irq_set_wake() in gic_arch_extn to unbreak wake up from the KEYSC device on AG5EVM in case of Suspend-to-RAM. Without this patch "echo mem > /sys/power/state" and a key press results in the following message on resume: WARNING: at kernel/irq/manage.c:507 irq_set_irq_wake+0x7c/0xd8() Unbalanced IRQ 103 wake disable Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/intc-sh73a0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c index 5d0e1503ece6..a911a60e7719 100644 --- a/arch/arm/mach-shmobile/intc-sh73a0.c +++ b/arch/arm/mach-shmobile/intc-sh73a0.c @@ -250,6 +250,11 @@ static irqreturn_t sh73a0_intcs_demux(int irq, void *dev_id) return IRQ_HANDLED; } +static int sh73a0_set_wake(struct irq_data *data, unsigned int on) +{ + return 0; /* always allow wakeup */ +} + void __init sh73a0_init_irq(void) { void __iomem *gic_dist_base = __io(0xf0001000); @@ -257,6 +262,7 @@ void __init sh73a0_init_irq(void) void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); gic_init(0, 29, gic_dist_base, gic_cpu_base); + gic_arch_extn.irq_set_wake = sh73a0_set_wake; register_intc_controller(&intcs_desc); From 311057250e87f7470ccca8bd68bf9e67f6b6db09 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 14 Jun 2011 15:07:06 +0900 Subject: [PATCH 073/327] ARM: mach-shmobile: Correct SCIF port types for SH7367. While SH7377 and others were updated to properly use SCIFA/B port types, SH7367 was left behind. Fix it up accordingly. Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/setup-sh7367.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-shmobile/setup-sh7367.c b/arch/arm/mach-shmobile/setup-sh7367.c index 2c10190dbb55..e546017f15de 100644 --- a/arch/arm/mach-shmobile/setup-sh7367.c +++ b/arch/arm/mach-shmobile/setup-sh7367.c @@ -38,7 +38,7 @@ static struct plat_sci_port scif0_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xc00), evt2irq(0xc00), evt2irq(0xc00), evt2irq(0xc00) }, }; @@ -57,7 +57,7 @@ static struct plat_sci_port scif1_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xc20), evt2irq(0xc20), evt2irq(0xc20), evt2irq(0xc20) }, }; @@ -76,7 +76,7 @@ static struct plat_sci_port scif2_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xc40), evt2irq(0xc40), evt2irq(0xc40), evt2irq(0xc40) }, }; @@ -95,7 +95,7 @@ static struct plat_sci_port scif3_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xc60), evt2irq(0xc60), evt2irq(0xc60), evt2irq(0xc60) }, }; @@ -114,7 +114,7 @@ static struct plat_sci_port scif4_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xd20), evt2irq(0xd20), evt2irq(0xd20), evt2irq(0xd20) }, }; @@ -133,7 +133,7 @@ static struct plat_sci_port scif5_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFA, .irqs = { evt2irq(0xd40), evt2irq(0xd40), evt2irq(0xd40), evt2irq(0xd40) }, }; @@ -152,7 +152,7 @@ static struct plat_sci_port scif6_platform_data = { .flags = UPF_BOOT_AUTOCONF, .scscr = SCSCR_RE | SCSCR_TE, .scbrr_algo_id = SCBRR_ALGO_4, - .type = PORT_SCIF, + .type = PORT_SCIFB, .irqs = { evt2irq(0xd60), evt2irq(0xd60), evt2irq(0xd60), evt2irq(0xd60) }, }; From ca2585afa013ec2cf99a48e46d6b82df2e240493 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Jun 2011 08:14:32 +0200 Subject: [PATCH 074/327] ALSA: hda - Fix missing static inline to beep dummy function The commit 2308f4add3de9f6c9c9f02e49461e94d84bb200a missed static inline thus it resulted in multiple-definitions error at linking. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_beep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h index 4967eabe774e..55f0647458c7 100644 --- a/sound/pci/hda/hda_beep.h +++ b/sound/pci/hda/hda_beep.h @@ -54,7 +54,7 @@ static inline int snd_hda_attach_beep_device(struct hda_codec *codec, int nid) { return 0; } -void snd_hda_detach_beep_device(struct hda_codec *codec) +static inline void snd_hda_detach_beep_device(struct hda_codec *codec) { } #endif From 583af252ab07cd1e8721878463da5b7016c18fdc Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 13 Jun 2011 04:42:15 +0000 Subject: [PATCH 075/327] drivers: sh: resume enabled clocks fix Extend the SH / SH-Mobile ARM clock framework to only resume clocks that have been enabled. Without this fix divide-by-zero is triggering on sh7372 FSIDIV during system wide resume of Suspend-to-RAM. Signed-off-by: Magnus Damm Reviewed-by: Simon Horman Signed-off-by: Paul Mundt --- drivers/sh/clk/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c index 4f64183b27fa..7e9c39951ecb 100644 --- a/drivers/sh/clk/core.c +++ b/drivers/sh/clk/core.c @@ -635,7 +635,7 @@ static void clks_core_resume(void) struct clk *clkp; list_for_each_entry(clkp, &clock_list, node) { - if (likely(clkp->ops)) { + if (likely(clkp->usecount && clkp->ops)) { unsigned long rate = clkp->rate; if (likely(clkp->ops->set_parent)) From 201fbceb258650157fcc4fd746abcdd3a571eada Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 10 Jun 2011 13:10:48 +0000 Subject: [PATCH 076/327] sh, exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so those calls to set_fs(USER_DS) are redundant. Signed-off-by: Mathias Krause Signed-off-by: Paul Mundt --- arch/sh/include/asm/processor_64.h | 1 - arch/sh/kernel/process_32.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h index 2a541ddb5a1b..e25c4c7d6b63 100644 --- a/arch/sh/include/asm/processor_64.h +++ b/arch/sh/include/asm/processor_64.h @@ -150,7 +150,6 @@ struct thread_struct { #define SR_USER (SR_MMU | SR_FD) #define start_thread(_regs, new_pc, new_sp) \ - set_fs(USER_DS); \ _regs->sr = SR_USER; /* User mode. */ \ _regs->pc = new_pc - 4; /* Compensate syscall exit */ \ _regs->pc |= 1; /* Set SHmedia ! */ \ diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index b473f0c06fbc..aaf6d59c2012 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -102,8 +102,6 @@ EXPORT_SYMBOL(kernel_thread); void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned long new_sp) { - set_fs(USER_DS); - regs->pr = 0; regs->sr = SR_FD; regs->pc = new_pc; From 1123d93963cbd2546449d4d9f0c568e323cb0ac6 Mon Sep 17 00:00:00 2001 From: Max Asbock Date: Mon, 13 Jun 2011 10:18:32 -0700 Subject: [PATCH 077/327] timerfd: Fix wakeup of processes when timer is cancelled on clock change Currently processes waiting with poll on cancelable timerfd timers are not woken up when the timers are canceled. When the system time is set the clock_was_set() function calls timerfd_clock_was_set() to cancel and wake up processes waiting on potential cancelable timerfd timers. However the wake up currently has no effect because in the case of timerfd_read it is dependent on ctx->ticks not being 0. timerfd_poll also requires ctx->ticks being non zero. As a consequence processes waiting on cancelable timers only get woken up when the timers expire. This patch fixes this by incrementing ctx->ticks before calling wake_up. Signed-off-by: Max Asbock Cc: kay.sievers@vrfy.org Cc: virtuoso@slind.org Cc: johnstul Link: http://lkml.kernel.org/r/1307985512.4710.41.camel@w-amax.beaverton.ibm.com Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index f67acbdda5e8..dffeb3795af1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) /* * Called when the clock was set to cancel the timers in the cancel - * list. + * list. This will wake up processes waiting on these timers. The + * wake-up requires ctx->ticks to be non zero, therefore we increment + * it before calling wake_up_locked(). */ void timerfd_clock_was_set(void) { @@ -76,6 +78,7 @@ void timerfd_clock_was_set(void) spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs.tv64 != moffs.tv64) { ctx->moffs.tv64 = KTIME_MAX; + ctx->ticks++; wake_up_locked(&ctx->wqh); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); From e9e35c5a2b2c803b5e2f25906d8ffe110670ceb6 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 14 Jun 2011 05:53:18 -0700 Subject: [PATCH 078/327] OMAP1: PM: register notifiers with generic clock ops even when !PM_RUNTIME When runtime PM is disabled, device clocks need to be enabled on device add and disabled on device remove. This currently is not happening because in the !PM_RUNTIME case, no notifiers are registered for OMAP1 devices. Fix this by ensuring notifiers are registered, even in the !PM_RUNTIME case. Reported-by: Janusz Krzysztofik Tested-by: Janusz Krzysztofik Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/Makefile | 4 ++-- arch/arm/mach-omap1/pm_bus.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index af98117043d2..5b114d1558c8 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -4,14 +4,14 @@ # Common support obj-y := io.o id.o sram.o time.o irq.o mux.o flash.o serial.o devices.o dma.o -obj-y += clock.o clock_data.o opp_data.o reset.o +obj-y += clock.o clock_data.o opp_data.o reset.o pm_bus.o obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o obj-$(CONFIG_OMAP_32K_TIMER) += timer32k.o # Power Management -obj-$(CONFIG_PM) += pm.o sleep.o pm_bus.o +obj-$(CONFIG_PM) += pm.o sleep.o # DSP obj-$(CONFIG_OMAP_MBOX_FWK) += mailbox_mach.o diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c index fe31d933f0ed..334fb8871bc3 100644 --- a/arch/arm/mach-omap1/pm_bus.c +++ b/arch/arm/mach-omap1/pm_bus.c @@ -56,9 +56,13 @@ static struct dev_power_domain default_power_domain = { USE_PLATFORM_PM_SLEEP_OPS }, }; +#define OMAP1_PWR_DOMAIN (&default_power_domain) +#else +#define OMAP1_PWR_DOMAIN NULL +#endif /* CONFIG_PM_RUNTIME */ static struct pm_clk_notifier_block platform_bus_notifier = { - .pwr_domain = &default_power_domain, + .pwr_domain = OMAP1_PWR_DOMAIN, .con_ids = { "ick", "fck", NULL, }, }; @@ -72,4 +76,4 @@ static int __init omap1_pm_runtime_init(void) return 0; } core_initcall(omap1_pm_runtime_init); -#endif /* CONFIG_PM_RUNTIME */ + From c46a131c0c0f4c2457e6b1e430c578a5cb057334 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2011 11:12:31 +0000 Subject: [PATCH 079/327] xfs: fix ->mknod() return value on xfs_get_acl() failure ->mknod() should return negative on errors and PTR_ERR() gives already negative value... Signed-off-by: Al Viro Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_iops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index dd21784525a8..d44d92cd12b1 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -182,7 +182,7 @@ xfs_vn_mknod( if (IS_POSIXACL(dir)) { default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(default_acl)) - return -PTR_ERR(default_acl); + return PTR_ERR(default_acl); if (!default_acl) mode &= ~current_umask(); From ded509880f6a0213b09f8ae7bef84acb16eaccbf Mon Sep 17 00:00:00 2001 From: "Roy.Li" Date: Fri, 20 May 2011 10:38:06 +0800 Subject: [PATCH 080/327] SELinux: skip file_name_trans_write() when policy downgraded. When policy version is less than POLICYDB_VERSION_FILENAME_TRANS, skip file_name_trans_write(). Signed-off-by: Roy.Li Signed-off-by: Eric Paris --- security/selinux/ss/policydb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 102e9ec1b77a..d246aca3f4fb 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -3222,6 +3222,9 @@ static int filename_trans_write(struct policydb *p, void *fp) __le32 buf[1]; int rc; + if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS) + return 0; + nel = 0; rc = hashtab_map(p->filename_trans, hashtab_cnt, &nel); if (rc) From 9a432736904d386cda28b987b38ba14dae960ecc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 May 2011 20:38:55 -0700 Subject: [PATCH 081/327] rcu: Simplify curing of load woes Make the functions creating the kthreads wake them up. Leverage the fact that the per-node and boost kthreads can run anywhere, thus dispensing with the need to wake them up once the incoming CPU has gone fully online. Signed-off-by: Paul E. McKenney Tested-by: Daniel J Blueman --- kernel/rcutree.c | 65 +++++++++++++---------------------------- kernel/rcutree_plugin.h | 11 +------ 2 files changed, 22 insertions(+), 54 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89419ff92e99..0a8ec5b2e208 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1635,6 +1635,20 @@ static int rcu_cpu_kthread(void *arg) * to manipulate rcu_cpu_kthread_task. There might be another CPU * attempting to access it during boot, but the locking in kthread_bind() * will enforce sufficient ordering. + * + * Please note that we cannot simply refuse to wake up the per-CPU + * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, + * which can result in softlockup complaints if the task ends up being + * idle for more than a couple of minutes. + * + * However, please note also that we cannot bind the per-CPU kthread to its + * CPU until that CPU is fully online. We also cannot wait until the + * CPU is fully online before we create its per-CPU kthread, as this would + * deadlock the system when CPU notifiers tried waiting for grace + * periods. So we bind the per-CPU kthread to its CPU only if the CPU + * is online. If its CPU is not yet fully online, then the code in + * rcu_cpu_kthread() will wait until it is fully online, and then do + * the binding. */ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) { @@ -1647,12 +1661,14 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); if (IS_ERR(t)) return PTR_ERR(t); - kthread_bind(t, cpu); + if (cpu_online(cpu)) + kthread_bind(t, cpu); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - per_cpu(rcu_cpu_kthread_task, cpu) = t; sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + per_cpu(rcu_cpu_kthread_task, cpu) = t; + wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ return 0; } @@ -1759,12 +1775,11 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ } return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } -static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); - /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,30 +1787,18 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; - struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) { + if (cpu_online(cpu)) (void)rcu_spawn_one_cpu_kthread(cpu); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (rnp->node_kthread_task) - wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) { + rcu_for_each_leaf_node(rcu_state, rnp) (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - rcu_wake_one_boost_kthread(rnp); - } } return 0; } @@ -2220,31 +2223,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } } -/* - * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, - * but the RCU threads are woken on demand, and if demand is low this - * could be a while triggering the hung task watchdog. - * - * In order to avoid this, poke all tasks once the CPU is fully - * up and running. - */ -static void __cpuinit rcu_online_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - - rcu_wake_one_boost_kthread(rnp); -} - /* * Handle CPU online/offline notification events. */ @@ -2262,7 +2240,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: - rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c8bff3099a89..ea2e2fb79e81 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1299,15 +1299,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ return 0; } -static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) -{ - if (rnp->boost_kthread_task) - wake_up_process(rnp->boost_kthread_task); -} - #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1331,10 +1326,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } -static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) -{ -} - #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 14 Jun 2011 13:26:25 +0800 Subject: [PATCH 082/327] rcu: Use softirq to address performance regression Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li Tested-by: "Alex,Shi" Signed-off-by: Paul E. McKenney --- Documentation/filesystems/proc.txt | 1 + include/linux/interrupt.h | 1 + include/trace/events/irq.h | 3 ++- kernel/rcutree.c | 23 +++++++++++++++++++---- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 9 +++++++++ kernel/softirq.c | 2 +- tools/perf/util/trace-event-parse.c | 1 + 8 files changed, 35 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f48178024067..db3b1aba32a3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu. TASKLET: 0 0 0 290 SCHED: 27035 26983 26971 26746 HRTIMER: 0 0 0 0 + RCU: 1678 1769 2178 2250 1.3 IDE devices in /proc/ide diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6c12989839d9..f6efed0039ed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -414,6 +414,7 @@ enum TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ae045ca7d356..1c09820df585 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -20,7 +20,8 @@ struct softirq_action; softirq_name(BLOCK_IOPOLL), \ softirq_name(TASKLET), \ softirq_name(SCHED), \ - softirq_name(HRTIMER)) + softirq_name(HRTIMER), \ + softirq_name(RCU)) /** * irq_handler_entry - called immediately before the irq action handler diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a8ec5b2e208..ae5c9ea68662 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable; static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_cpu_kthread(void); +static void __invoke_rcu_cpu_kthread(void); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* If there are callbacks ready, invoke them. */ - rcu_do_batch(rsp, rdp); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + __invoke_rcu_cpu_kthread(); +} + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); } /* * Do softirq processing for the current CPU. */ -static void rcu_process_callbacks(void) +static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void invoke_rcu_cpu_kthread(void) +static void __invoke_rcu_cpu_kthread(void) { unsigned long flags; @@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); } +static void invoke_rcu_cpu_kthread(void) +{ + raise_softirq(RCU_SOFTIRQ); +} + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg) *workp = 0; local_irq_restore(flags); if (work) - rcu_process_callbacks(); + rcu_kthread_do_work(); local_bh_enable(); if (*workp != 0) spincnt++; @@ -2387,6 +2401,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7b9a08b4aaea..0fed6b934d2a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); +static void rcu_preempt_do_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ea2e2fb79e81..38d09c5f2b41 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void) &__get_cpu_var(rcu_preempt_data)); } +static void rcu_preempt_do_callbacks(void) +{ + rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); +} + /* * Queue a preemptible-RCU callback for invocation after a grace period. */ @@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void) { } +static void rcu_preempt_do_callbacks(void) +{ +} + /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. diff --git a/kernel/softirq.c b/kernel/softirq.c index 13960170cad4..40cf63ddd4b3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", - "TASKLET", "SCHED", "HRTIMER" + "TASKLET", "SCHED", "HRTIMER", "RCU" }; /* diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1e88485c16a0..0a7ed5b5e281 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2187,6 +2187,7 @@ static const struct flag flags[] = { { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, + { "RCU_SOFTIRQ", 9 }, { "HRTIMER_NORESTART", 0 }, { "HRTIMER_RESTART", 1 }, From 05a7929f31a0d516a9c19012bf27a1ea5058dc7a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Jun 2011 06:16:35 +0000 Subject: [PATCH 083/327] ARM: mach-shmobile: mackerel: tidyup usbhs driver settings - usb0 pipe is same as default. own pipe config is not needed - usb1 lost get_id function Signed-off-by: Kuninori Morimoto Acked-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-mackerel.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 1037bd2ffdb9..7e1d37584321 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -629,19 +629,6 @@ static void usbhs0_hardware_exit(struct platform_device *pdev) cancel_delayed_work_sync(&priv->work); } -static u32 usbhs0_pipe_cfg[] = { - USB_ENDPOINT_XFER_CONTROL, - USB_ENDPOINT_XFER_ISOC, - USB_ENDPOINT_XFER_ISOC, - USB_ENDPOINT_XFER_BULK, - USB_ENDPOINT_XFER_BULK, - USB_ENDPOINT_XFER_BULK, - USB_ENDPOINT_XFER_INT, - USB_ENDPOINT_XFER_INT, - USB_ENDPOINT_XFER_INT, - USB_ENDPOINT_XFER_BULK, -}; - static struct usbhs_private usbhs0_private = { .usbcrcaddr = 0xe605810c, /* USBCR2 */ .info = { @@ -654,8 +641,6 @@ static struct usbhs_private usbhs0_private = { }, .driver_param = { .buswait_bwait = 4, - .pipe_type = usbhs0_pipe_cfg, - .pipe_size = ARRAY_SIZE(usbhs0_pipe_cfg), }, }, }; @@ -786,6 +771,11 @@ static void usbhs1_hardware_exit(struct platform_device *pdev) free_irq(IRQ8, pdev); } +static int usbhs1_get_id(struct platform_device *pdev) +{ + return USBHS_GADGET; +} + static u32 usbhs1_pipe_cfg[] = { USB_ENDPOINT_XFER_CONTROL, USB_ENDPOINT_XFER_ISOC, @@ -812,6 +802,7 @@ static struct usbhs_private usbhs1_private = { .platform_callback = { .hardware_init = usbhs1_hardware_init, .hardware_exit = usbhs1_hardware_exit, + .get_id = usbhs1_get_id, .phy_reset = usbhs_phy_reset, .get_vbus = usbhs_get_vbus, }, From 8dd0de8be31b4b966d17750a0b10df2f575c91ac Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 14 Jun 2011 18:36:24 -0400 Subject: [PATCH 084/327] sched: Fix need_resched() when checking peempt The RT preempt check tests the wrong task if NEED_RESCHED is set. It currently checks the local CPU task. It is supposed to check the task that is running on the runqueue we are about to wake another task on. Signed-off-by: Hillf Danton Reviewed-by: Yong Zhang Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110614223657.450239027@goodmis.org Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 88725c939e0b..9b8d5dce946e 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1096,7 +1096,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag * to move current somewhere else, making room for our non-migratable * task. */ - if (p->prio == rq->curr->prio && !need_resched()) + if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) check_preempt_equal_prio(rq, p); #endif } From 0da938c44921cfb690283d3b0c9c48a10375db2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Jun 2011 18:36:25 -0400 Subject: [PATCH 085/327] sched: Check if lowest_mask is initialized in find_lowest_rq() On system boot up, the lowest_mask is initialized with an early_initcall(). But RT tasks may wake up on other early_initcall() callers before the lowest_mask is initialized, causing a system crash. Commit "d72bce0e67 rcu: Cure load woes" was the first commit to wake up RT tasks in early init. Before this commit this bug should not happen. Reported-by: Andrew Theurer Tested-by: Andrew Theurer Tested-by: Paul E. McKenney Signed-off-by: Steven Rostedt Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110614223657.824872966@goodmis.org Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9b8d5dce946e..10d018212bab 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1239,6 +1239,10 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + /* Make sure the mask is initialized first */ + if (unlikely(!lowest_mask)) + return -1; + if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ From a7b21165c06f28230768d203285d06cac4f18f0b Mon Sep 17 00:00:00 2001 From: Yogesh Ashok Powar Date: Mon, 13 Jun 2011 09:49:27 +0530 Subject: [PATCH 086/327] mwifiex: Fixing NULL pointer dereference Following OOPS was seen when booting with card inserted BUG: unable to handle kernel NULL pointer dereference at 0000004c IP: [] cfg80211_get_drvinfo+0x21/0x115 [cfg80211] *pde = 00000000 Oops: 0000 [#1] SMP Modules linked in: iwl3945 iwl_legacy mwifiex_sdio mac80211 11 sdhci_pci sdhci pl2303 'ethtool' on the mwifiex device returned this OOPS as wiphy_dev() returned NULL. Adding missing set_wiphy_dev() call to fix the problem. Signed-off-by: Yogesh Ashok Powar Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/cfg80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 660831ce293c..687c1f223497 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1288,6 +1288,8 @@ int mwifiex_register_cfg80211(struct net_device *dev, u8 *mac, *(unsigned long *) wdev_priv = (unsigned long) priv; + set_wiphy_dev(wdev->wiphy, (struct device *) priv->adapter->dev); + ret = wiphy_register(wdev->wiphy); if (ret < 0) { dev_err(priv->adapter->dev, "%s: registering cfg80211 device\n", From 3373b28e5af4a0b3c6cb39372581dcc1e41322ff Mon Sep 17 00:00:00 2001 From: Nishant Sarmukadam Date: Mon, 13 Jun 2011 16:26:15 +0530 Subject: [PATCH 087/327] mwl8k: Tell firmware to generate CCMP header Post commit e4eefec73ea0a740bfe8736e3ac30dfe92fe392b, the stack is not generating the CCMP header for us anymore. This broke the CCMP functionality since firmware was not doing this either. Set a flag to tell the firmware to generate the CCMP header Signed-off-by: Nishant Sarmukadam Signed-off-by: John W. Linville --- drivers/net/wireless/mwl8k.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 32261189bcef..aeac3cc4dbe4 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2474,6 +2474,7 @@ struct mwl8k_cmd_set_hw_spec { * faster client. */ #define MWL8K_SET_HW_SPEC_FLAG_ENABLE_LIFE_TIME_EXPIRY 0x00000400 +#define MWL8K_SET_HW_SPEC_FLAG_GENERATE_CCMP_HDR 0x00000200 #define MWL8K_SET_HW_SPEC_FLAG_HOST_DECR_MGMT 0x00000080 #define MWL8K_SET_HW_SPEC_FLAG_HOSTFORM_PROBERESP 0x00000020 #define MWL8K_SET_HW_SPEC_FLAG_HOSTFORM_BEACON 0x00000010 @@ -2510,7 +2511,8 @@ static int mwl8k_cmd_set_hw_spec(struct ieee80211_hw *hw) cmd->flags = cpu_to_le32(MWL8K_SET_HW_SPEC_FLAG_HOST_DECR_MGMT | MWL8K_SET_HW_SPEC_FLAG_HOSTFORM_PROBERESP | MWL8K_SET_HW_SPEC_FLAG_HOSTFORM_BEACON | - MWL8K_SET_HW_SPEC_FLAG_ENABLE_LIFE_TIME_EXPIRY); + MWL8K_SET_HW_SPEC_FLAG_ENABLE_LIFE_TIME_EXPIRY | + MWL8K_SET_HW_SPEC_FLAG_GENERATE_CCMP_HDR); cmd->num_tx_desc_per_queue = cpu_to_le32(MWL8K_TX_DESCS); cmd->total_rxd = cpu_to_le32(MWL8K_RX_DESCS); From e72888e91cc902ccdc089f237b6eed7587e2b4df Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 15 Jun 2011 15:14:49 +0200 Subject: [PATCH 088/327] ALSA: lola - Fix section mismatch Add missing __devinit. Signed-off-by: Takashi Iwai --- sound/pci/lola/lola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c index 34b24286d279..2692e5ae5f2d 100644 --- a/sound/pci/lola/lola.c +++ b/sound/pci/lola/lola.c @@ -445,7 +445,7 @@ static void lola_reset_setups(struct lola *chip) lola_setup_all_analog_gains(chip, PLAY, false); /* output, update */ } -static int lola_parse_tree(struct lola *chip) +static int __devinit lola_parse_tree(struct lola *chip) { unsigned int val; int nid, err; From 9e3bd4e24e94d60d2e0762e919aab6c9a7fc0c5b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 31 May 2011 21:46:50 -0400 Subject: [PATCH 089/327] NFS: fix umount of pnfs filesystems Unmounting a pnfs filesystem hangs using filelayout and possibly others. This fixes the use of the rcu protected node by making use of a new 'tmpnode' for the temporary purge list. Also, the spinlock shouldn't be held when calling synchronize_rcu(). Signed-off-by: Weston Andros Adamson Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 1 + fs/nfs/pnfs_dev.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 48d0a8e4d062..96bf4e6f45be 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *); /* pnfs_dev.c */ struct nfs4_deviceid_node { struct hlist_node node; + struct hlist_node tmpnode; const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; struct nfs4_deviceid deviceid; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index c65e133ce9c0..5944d4b369a2 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, const struct nfs4_deviceid *id) { INIT_HLIST_NODE(&d->node); + INIT_HLIST_NODE(&d->tmpnode); d->ld = ld; d->nfs_client = nfs_client; d->deviceid = *id; @@ -238,24 +239,29 @@ static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n, *next; + struct hlist_node *n; HLIST_HEAD(tmp); + spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); - hlist_add_head(&d->node, &tmp); + hlist_add_head(&d->tmpnode, &tmp); } rcu_read_unlock(); + spin_unlock(&nfs4_deviceid_lock); if (hlist_empty(&tmp)) return; synchronize_rcu(); - hlist_for_each_entry_safe(d, n, next, &tmp, node) + while (!hlist_empty(&tmp)) { + d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); + hlist_del(&d->tmpnode); if (atomic_dec_and_test(&d->ref)) d->ld->free_deviceid_node(d); + } } void @@ -263,8 +269,8 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) { long h; - spin_lock(&nfs4_deviceid_lock); + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) + return; for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) _deviceid_purge_client(clp, h); - spin_unlock(&nfs4_deviceid_lock); } From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 May 2011 15:15:34 -0400 Subject: [PATCH 090/327] NLM: Don't hang forever on NLM unlock requests If the NLM daemon is killed on the NFS server, we can currently end up hanging forever on an 'unlock' request, instead of aborting. Basically, if the rpcbind request fails, or the server keeps returning garbage, we really want to quit instead of retrying. Tested-by: Vasily Averin Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/lockd/clntproc.c | 8 +++++++- include/linux/sunrpc/sched.h | 3 ++- net/sunrpc/clnt.c | 3 +++ net/sunrpc/sched.c | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index adb45ec9038c..e374050a911c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); - goto retry_rebind; + switch (task->tk_status) { + case -EACCES: + case -EIO: + goto die; + default: + goto retry_rebind; + } } if (status == NLM_LCK_DENIED_GRACE_PERIOD) { rpc_delay(task, NLMCLNT_GRACE_WAIT); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f73c482ec9c6..fe2d8e6b923b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -84,7 +84,8 @@ struct rpc_task { #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2; + tk_cred_retry : 2, + tk_rebind_retry : 2; }; #define tk_xprt tk_client->cl_xprt diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b84d7395535e..566bcfd067f6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1175,6 +1175,9 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } + if (task->tk_rebind_retry == 0) + break; + task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6b43ee7221d5..a27406b1654f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -792,6 +792,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; From 0f66b5984df2fe1617c05900a39a7ef493ca9de9 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 16 Oct 2010 22:07:46 -0700 Subject: [PATCH 091/327] NFS41: do not update isize if inode needs layoutcommit nfs_update_inode will update isize if there is no queued pages. For pNFS, layoutcommit is supposed to change file size on server, the same effect as queued pages. nfs_update_inode may be called when dirty pages are written back (nfsi->npages==0) but layoutcommit is not sent, and it will change client file size according to server file size. Then client ends up losing what it just writes back in pNFS path. So we should skip updating client file size if file needs layoutcommit. Signed-off-by: Peng Tao Cc: stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 144f2a3c7185..3f1eb8180373 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1294,7 +1294,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if (nfsi->npages == 0 || new_isize > cur_isize) { + if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) || + new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } From c9c30dd5f73dccaa326a54dfcf490316946aea87 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sat, 11 Jun 2011 17:08:39 -0400 Subject: [PATCH 092/327] NFSv4.1: deprecate headerpadsz in CREATE_SESSION We don't support header padding yet so better off ditching it Reported-by: Sid Moore Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ---- fs/nfs/nfs4xdr.c | 10 ++++++---- include/linux/nfs_xdr.h | 1 - 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2c4b59c896d..3ca449789545 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5098,7 +5098,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) if (mxresp_sz == 0) mxresp_sz = NFS_MAX_FILE_IO_SIZE; /* Fore channel attributes */ - args->fc_attrs.headerpadsz = 0; args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; @@ -5111,7 +5110,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_ops, args->fc_attrs.max_reqs); /* Back channel attributes */ - args->bc_attrs.headerpadsz = 0; args->bc_attrs.max_rqst_sz = PAGE_SIZE; args->bc_attrs.max_resp_sz = PAGE_SIZE; args->bc_attrs.max_resp_sz_cached = 0; @@ -5131,8 +5129,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args struct nfs4_channel_attrs *sent = &args->fc_attrs; struct nfs4_channel_attrs *rcvd = &session->fc_attrs; - if (rcvd->headerpadsz > sent->headerpadsz) - return -EINVAL; if (rcvd->max_resp_sz > sent->max_resp_sz) return -EINVAL; /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d869a5e5464b..c4b7d6c04948 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(args->flags); /*flags */ /* Fore Channel */ - *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(0); /* header padding size */ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ @@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ /* Back Channel */ - *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(0); /* header padding size */ *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ @@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr, struct nfs4_channel_attrs *attrs) { __be32 *p; - u32 nr_attrs; + u32 nr_attrs, val; p = xdr_inline_decode(xdr, 28); if (unlikely(!p)) goto out_overflow; - attrs->headerpadsz = be32_to_cpup(p++); + val = be32_to_cpup(p++); /* headerpadsz */ + if (val) + return -EINVAL; /* no support for header padding yet */ attrs->max_rqst_sz = be32_to_cpup(p++); attrs->max_resp_sz = be32_to_cpup(p++); attrs->max_resp_sz_cached = be32_to_cpup(p++); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5e8444a11adf..00848d86ffb2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -158,7 +158,6 @@ struct nfs_seqid; /* nfs41 sessions channel attributes */ struct nfs4_channel_attrs { - u32 headerpadsz; u32 max_rqst_sz; u32 max_resp_sz; u32 max_resp_sz_cached; From 1d92a08da23848a38eece4df7eaa4e8ec0e6c699 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Jun 2011 12:07:38 -0400 Subject: [PATCH 093/327] NFSv4.1: Fix a refcounting issue in the pNFS device id cache When we add something to the global device id cache, we need to bump the reference count, so that the cache itself holds a reference. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 5944d4b369a2..f0f8e1e22f6c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -209,6 +209,7 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); spin_unlock(&nfs4_deviceid_lock); + atomic_inc(&new->ref); return new; } From 533eb4611c9eea53072eb6a61d5a6393b6a77ed7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 13 Jun 2011 18:25:56 -0400 Subject: [PATCH 094/327] NFSv4.1: allow nfs_fhget to succeed with mounted on fileid Commit 28331a46d88459788c8fca72dbb0415cd7f514c9 "Ensure we request the ordinary fileid when doing readdirplus" changed the meaning of NFS_ATTR_FATTR_FILEID which used to be set when FATTR4_WORD1_MOUNTED_ON_FILED was requested. Allow nfs_fhget to succeed with only a mounted on fileid when crossing a mountpoint or a referral. Ask for the fileid of the absent file system if mounted_on_fileid is not supported. Signed-off-by: Andy Adamson cc:stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- fs/nfs/internal.h | 11 +++++++++++ fs/nfs/nfs4proc.c | 33 +++++++++++++++++++++++---------- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3f1eb8180373..6f4850deb272 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -256,7 +256,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfs_attr_check_mountpoint(sb, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) + if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) && + !nfs_attr_use_mounted_on_fileid(fattr)) goto out_no_inode; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b9056cbe68d6..2a55347a2daa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -45,6 +45,17 @@ static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; } +static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) +{ + if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) || + (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) && + ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0))) + return 0; + + fattr->fileid = fattr->mounted_on_fileid; + return 1; +} + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3ca449789545..289c539bd6b3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2265,12 +2265,14 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_map_errors(status); } +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); /* * Get locations and (maybe) other attributes of a referral. * Note that we'll actually follow the referral later when * we detect fsid mismatch in inode revalidation */ -static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) +static int nfs4_get_referral(struct inode *dir, const struct qstr *name, + struct nfs_fattr *fattr, struct nfs_fh *fhandle) { int status = -ENOMEM; struct page *page = NULL; @@ -2288,15 +2290,16 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct goto out; /* Make sure server returned a different fsid for the referral */ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { - dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name); + dprintk("%s: server did not return a different fsid for" + " a referral at %s\n", __func__, name->name); status = -EIO; goto out; } + /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ + nfs_fixup_referral_attributes(&locations->fattr); + /* replace the lookup nfs_fattr with the locations nfs_fattr */ memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); - fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL; - if (!fattr->mode) - fattr->mode = S_IFDIR; memset(fhandle, 0, sizeof(struct nfs_fh)); out: if (page) @@ -4667,11 +4670,15 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, return len; } +/* + * nfs_fhget will use either the mounted_on_fileid or the fileid + */ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) { - if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) && - (fattr->valid & NFS_ATTR_FATTR_FSID) && - (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) + if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || + (fattr->valid & NFS_ATTR_FATTR_FILEID)) && + (fattr->valid & NFS_ATTR_FATTR_FSID) && + (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) return; fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | @@ -4686,7 +4693,6 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs_server *server = NFS_SERVER(dir); u32 bitmask[2] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - [1] = FATTR4_WORD1_MOUNTED_ON_FILEID, }; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), @@ -4705,11 +4711,18 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, int status; dprintk("%s: start\n", __func__); + + /* Ask for the fileid of the absent filesystem if mounted_on_fileid + * is not supported */ + if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + else + bitmask[0] |= FATTR4_WORD0_FILEID; + nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); - nfs_fixup_referral_attributes(&fs_locations->fattr); dprintk("%s: returned status = %d\n", __func__, status); return status; } From cec765cf5891c7fc3d905832b481bfb6fd55825d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 13 Jun 2011 18:36:17 -0400 Subject: [PATCH 095/327] NFSv4.1: allow zero fh array in filelayout decode layout Signed-off-by: Andy Adamson cc:stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 426908809c97..5d6f369b15d0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -552,13 +552,18 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __func__, nfl_util, fl->num_fh, fl->first_stripe_index, fl->pattern_offset); - if (!fl->num_fh) + /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. + * Futher checking is done in filelayout_check_layout */ + if (fl->num_fh < 0 || fl->num_fh > + max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) goto out_err; - fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), - gfp_flags); - if (!fl->fh_array) - goto out_err; + if (fl->num_fh > 0) { + fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + gfp_flags); + if (!fl->fh_array) + goto out_err; + } for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */ From a2e1d4f2e5ed83850de92a491ef225824cb457bd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Mon, 13 Jun 2011 18:54:53 -0400 Subject: [PATCH 096/327] nfs4.1: fix several problems with _pnfs_return_layout _pnfs_return_layout had the following problems: - it did not call pnfs_free_lseg_list on all paths - it unintentionally did a forgetful return when there was no outstanding io - it raced with concurrent LAYOUTGETS Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/pnfs.c | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 289c539bd6b3..5879b23e0c99 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5706,6 +5706,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; dprintk("--> %s\n", __func__); @@ -5717,16 +5718,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) nfs_restart_rpc(task, lrp->clp); return; } + spin_lock(&lo->plh_inode->i_lock); if (task->tk_status == 0) { - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; - if (lrp->res.lrs_present) { - spin_lock(&lo->plh_inode->i_lock); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - spin_unlock(&lo->plh_inode->i_lock); } else BUG_ON(!list_empty(&lo->plh_segs)); } + lo->plh_block_lgets--; + spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8c1309d852a6..25de6b27bdf4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -634,12 +634,14 @@ _pnfs_return_layout(struct inode *ino) spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { + if (!lo) { spin_unlock(&ino->i_lock); - dprintk("%s: no layout segments to return\n", __func__); - goto out; + dprintk("%s: no layout to return\n", __func__); + return status; } stateid = nfsi->layout->plh_stateid; + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + lo->plh_block_lgets++; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); spin_unlock(&ino->i_lock); From d771e3a43e23a37398b7e05a9d1b1036d698263c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 14 Jun 2011 16:30:16 -0400 Subject: [PATCH 097/327] NFSv4.1: fix break condition in pnfs_find_lseg The break condition to skip out of the loop got broken when cmp_layout was change. Essentially, we want to stop looking once we know no layout on the remainder of the list can match the first byte of the looked-up range. Reported-by: Peng Tao Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 25de6b27bdf4..d066aad608ad 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -889,7 +889,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, ret = get_lseg(lseg); break; } - if (cmp_layout(range, &lseg->pls_range) > 0) + if (lseg->pls_range.offset > range->offset) break; } From c7fd06228b994190d8369a2a0acf5224e4e13d1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 Jun 2011 00:55:44 +0100 Subject: [PATCH 098/327] NFS: (d)printks should use %zd for ssize_t arguments (d)printks should use %zd for ssize_t arguments not %ld, otherwise they might get a warning. I see the following with MN10300. fs/nfs/objlayout/objlayout.c: In function 'objlayout_read_done': fs/nfs/objlayout/objlayout.c:294: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' Signed-off-by: David Howells cc: Trond Myklebust cc: linux-nfs@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objlayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index dc3956c0de80..1d06f8e2adea 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -291,7 +291,7 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) struct nfs_read_data *rdata; state->status = status; - dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); + dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); rdata = state->rpcdata; rdata->task.tk_status = status; if (status >= 0) { From 1ed3a8539af7b36aa5c977f304e80f7fc8d27bfc Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 15 Jun 2011 11:39:57 -0400 Subject: [PATCH 099/327] NFSv4.1: need to put_layout_hdr on _pnfs_return_layout error path We always get a reference on the layout header and we rely on nfs4_layoutreturn_release to put it. If we hit an allocation error before starting the rpc proc we bail out early without dereferncing the layout header properly. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d066aad608ad..8f9582281252 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -652,6 +652,7 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; + put_layout_hdr(lo); goto out; } From ea0ded748bdea78f9e2fefb571f7d6ce9edb4f89 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 15 Jun 2011 12:31:02 -0400 Subject: [PATCH 100/327] nfs4.1: prevent race that allowed use of freed layout in _pnfs_return_layout mark_matching_lsegs_invalid could put the last ref to the layout, so the get_layout_hdr needs to be called first. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8f9582281252..730d4dbbaf68 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -640,10 +640,10 @@ _pnfs_return_layout(struct inode *ino) return status; } stateid = nfsi->layout->plh_stateid; - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); - lo->plh_block_lgets++; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + lo->plh_block_lgets++; spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); From 71d7aed014457147e8f71a843d5fbf03235e4a85 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 14 Jun 2011 14:24:32 -0400 Subject: [PATCH 101/327] Btrfs: fix path leakage on subvol deletion The delayed ref patch accidently removed the btrfs_free_path in btrfs_unlink_subvol, this puts it back and means we don't leak a path. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c15636b17874..5813dec5101c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); + btrfs_free_path(path); return 0; } From 8351583e3f6e430ce8f71913909a96ad5cc6a2f6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 14 Jun 2011 15:16:14 -0400 Subject: [PATCH 102/327] Btrfs: protect the pending_snapshots list with trans_lock Currently there is nothing protecting the pending_snapshots list on the transaction. We only hold the directory mutex that we are snapshotting and a read lock on the subvol_sem, so we could race with somebody else creating a snapshot in a different directory and end up with list corruption. So protect this list with the trans_lock. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b793d112d1f6..a3c4751e07db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); BUG_ON(ret); + spin_lock(&root->fs_info->trans_lock); list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); + spin_unlock(&root->fs_info->trans_lock); if (async_transid) { *async_transid = trans->transid; ret = btrfs_commit_transaction_async(trans, From ed0ca14021e5ae3147602128641aa7f742ab227c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 14 Jun 2011 16:22:15 -0400 Subject: [PATCH 103/327] Btrfs: set no_trans_join after trying to expand the transaction We can lockup if we try to allow new writers join the transaction and we have flushoncommit set or have a pending snapshot. This is because we set no_trans_join and then loop around and try to wait for ordered extents again. The problem is the ordered endio stuff needs to join the transaction, which it can't do because no_trans_join is set. So instead wait until after this loop to set no_trans_join and then make sure to wait for num_writers == 1 in case anybody got started in between us exiting the loop and setting no_trans_join. This could easily be reproduced by mounting -o flushoncommit and running xfstest 13. It cannot be reproduced with this patch. Thanks, Reported-by: Jim Schutt Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b3590b9fe98..56695595e036 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1241,12 +1241,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, schedule_timeout(1); finish_wait(&cur_trans->writer_wait, &wait); - spin_lock(&root->fs_info->trans_lock); - root->fs_info->trans_no_join = 1; - spin_unlock(&root->fs_info->trans_lock); } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); + /* + * Ok now we need to make sure to block out any other joins while we + * commit the transaction. We could have started a join before setting + * no_join so make sure to wait for num_writers to == 1 again. + */ + spin_lock(&root->fs_info->trans_lock); + root->fs_info->trans_no_join = 1; + spin_unlock(&root->fs_info->trans_lock); + wait_event(cur_trans->writer_wait, + atomic_read(&cur_trans->num_writers) == 1); + ret = create_pending_snapshots(trans, root->fs_info); BUG_ON(ret); From 900cba8881b39dfbc7c8062098504ab93f5387a8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2009 10:31:31 +0100 Subject: [PATCH 104/327] xen: support CONFIG_MAXSMP The MAXSMP config option requires CPUMASK_OFFSTACK, which in turn requires we init the memory for the maps while we bring up the cpus. MAXSMP also increases NR_CPUS to 4096. This increase in size exposed an issue in the argument construction for multicalls from xen_flush_tlb_others. The args should only need space for the actual number of cpus. Also in 2.6.39 it exposes a bootup problem. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] set_cpu_sibling_map+0x123/0x30d ... Call Trace: [] ? xen_restore_fl_direct_reloc+0x4/0x4 [] xen_smp_prepare_cpus+0x36/0x135 .. CC: stable@kernel.org Signed-off-by: Andrew Jones [v2: Updated to compile on 3.0] [v3: Updated to compile when CONFIG_SMP is not defined] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 3 ++- arch/x86/xen/smp.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index afe1d54f980c..673e968df3cf 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -1231,7 +1232,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, { struct { struct mmuext_op op; - DECLARE_BITMAP(mask, NR_CPUS); + DECLARE_BITMAP(mask, num_processors); } *args; struct multicall_space mcs; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 41038c01de40..b4533a86d7e4 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -205,11 +205,18 @@ static void __init xen_smp_prepare_boot_cpu(void) static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; + unsigned int i; xen_init_lock_cpu(0); smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; + + for_each_possible_cpu(i) { + zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); + } set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) From b5328cd14557880e9eb757a8a9c8a88f1b23533a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 15 Jun 2011 14:24:29 -0400 Subject: [PATCH 105/327] xen: Fix compile warning when CONFIG_SMP is not defined. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .. which is quite benign. drivers/xen/events.c:398: warning: unused variable ‘desc’ Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 553da68bd510..30df85d8fca8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -395,9 +395,9 @@ static void unmask_evtchn(int port) static void xen_irq_init(unsigned irq) { struct irq_info *info; +#ifdef CONFIG_SMP struct irq_desc *desc = irq_to_desc(irq); -#ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); #endif From 9e2dfdb3081edfae66a49013517e80dd8a0469fa Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 15 Jun 2011 14:32:02 -0400 Subject: [PATCH 106/327] nfs4.1: mark layout as bad on error path in _pnfs_return_layout Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 730d4dbbaf68..539b94cb6c0f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -652,6 +652,8 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; + set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); + set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); put_layout_hdr(lo); goto out; } From 37aa9a2eb4d9b1a4aec1fd18bb2bb6bca029de27 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 15 Jun 2011 14:35:00 +0100 Subject: [PATCH 107/327] perf: clear out make flags when calling kernel make kernelver When generating the perf version from the kernel version using 'make kernelver' it is necessary to clear out any MAKEFLAGS otherwise they may trigger additional output which pollute the contents. Signed-off-by: Andy Whitcroft Signed-off-by: Michal Marek --- tools/perf/util/PERF-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 9c5fb4d93824..ad73300f7bac 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -23,7 +23,7 @@ if test -d ../../.git -o -f ../../.git && then VN=$(echo "$VN" | sed -e 's/-/./g'); else - VN=$(make -sC ../.. kernelversion) + VN=$(MAKEFLAGS= make -sC ../.. kernelversion) fi VN=$(expr "$VN" : v*'\(.*\)') From 569658dddf276ceb0780776e7f5d61d9f8d8cb88 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 15 Jun 2011 22:15:47 +0200 Subject: [PATCH 108/327] kbuild: Call depmod.sh via shell The script has the executable bit in git, but plain old patch(1) can't create executable files. Reported-by: Tetsuo Handa Signed-off-by: Michal Marek --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43509373f37a..67640639d8f3 100644 --- a/Makefile +++ b/Makefile @@ -1526,7 +1526,8 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # Run depmod only if we have System.map and depmod is executable quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) - cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) + cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ + $(KERNELRELEASE) # Create temporary dir for module support files # clean it up only when building all modules From b2abe50688dcb470e2e46109da7e7e02245ed59b Mon Sep 17 00:00:00 2001 From: Tom Goetz Date: Mon, 16 May 2011 15:06:26 -0400 Subject: [PATCH 109/327] xen: When calling power_off, don't call the halt function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .. As it won't actually power off the machine. Reported-by: Sven Köhler Tested-by: Sven Köhler Signed-off-by: Tom Goetz Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dd7b88f2ec7a..5525163a0398 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1033,6 +1033,13 @@ static void xen_machine_halt(void) xen_reboot(SHUTDOWN_poweroff); } +static void xen_machine_power_off(void) +{ + if (pm_power_off) + pm_power_off(); + xen_reboot(SHUTDOWN_poweroff); +} + static void xen_crash_shutdown(struct pt_regs *regs) { xen_reboot(SHUTDOWN_crash); @@ -1058,7 +1065,7 @@ int xen_panic_handler_init(void) static const struct machine_ops xen_machine_ops __initconst = { .restart = xen_restart, .halt = xen_machine_halt, - .power_off = xen_machine_halt, + .power_off = xen_machine_power_off, .shutdown = xen_machine_halt, .crash_shutdown = xen_crash_shutdown, .emergency_restart = xen_emergency_restart, From 793925334f32e9026c22baee5c3c340f47d4ef7e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 Jun 2011 12:47:04 -0700 Subject: [PATCH 110/327] proc: Fix Oops on stat of /proc//ns/net Don't call iput with the inode half setup to be a namespace filedescriptor. Instead rearrange the code so that we don't initialize ei->ns_ops until after I ns_ops->get succeeds, preventing us from invoking ns_ops->put when ns_ops->get failed. Reported-by: Ingo Saitz Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 781dec5bd682..be177f702acb 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); + void *ns; inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) goto out; + ns = ns_ops->get(task); + if (!ns) + goto out_iput; + ei = PROC_I(inode); inode->i_mode = S_IFREG|S_IRUSR; inode->i_fop = &ns_file_operations; ei->ns_ops = ns_ops; - ei->ns = ns_ops->get(task); - if (!ei->ns) - goto out_iput; + ei->ns = ns; dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); From f300ea499721ca208fc4714b9105bfd7e9f75be0 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 15 Jun 2011 15:08:08 -0700 Subject: [PATCH 111/327] mm: remove khugepaged double thp vmstat update with CONFIG_NUMA=n Johannes noticed the vmstat update is already taken care of by khugepaged_alloc_hugepage() internally. The only places that are required to update the vmstat are the callers of alloc_hugepage (callers of khugepaged_alloc_hugepage aren't). Signed-off-by: Andrea Arcangeli Reported-by: Johannes Weiner Acked-by: Rik van Riel Reviewed-by: Minchan Kim Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 615d9743a3cb..81532f297fd2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2234,11 +2234,8 @@ static void khugepaged_loop(void) while (likely(khugepaged_enabled())) { #ifndef CONFIG_NUMA hpage = khugepaged_alloc_hugepage(); - if (unlikely(!hpage)) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); + if (unlikely(!hpage)) break; - } - count_vm_event(THP_COLLAPSE_ALLOC); #else if (IS_ERR(hpage)) { khugepaged_alloc_sleep(); From 0164f69d0cf1a6abbc936851f5b72ece92187cda Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jun 2011 15:08:09 -0700 Subject: [PATCH 112/327] mm/memory.c: fix kernel-doc notation Fix new kernel-doc warnings in mm/memory.c: Warning(mm/memory.c:1327): No description found for parameter 'tlb' Warning(mm/memory.c:1327): Excess function parameter 'tlbp' description in 'unmap_vmas' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 6953d3926e01..b13e7dbc399b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1296,7 +1296,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, /** * unmap_vmas - unmap a range of memory covered by a list of vma's - * @tlbp: address of the caller's struct mmu_gather + * @tlb: address of the caller's struct mmu_gather * @vma: the starting vma * @start_addr: virtual address at which to start unmapping * @end_addr: virtual address at which to end unmapping From b0825ee3a8c570df4873ee397fa453e67fdad5d7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jun 2011 15:08:10 -0700 Subject: [PATCH 113/327] lib/bitmap.c: fix kernel-doc notation Fix new kernel-doc warnings in lib/bitmap.c: Warning(lib/bitmap.c:596): No description found for parameter 'buf' Warning(lib/bitmap.c:596): Excess function parameter 'bp' description in '__bitmap_parselist' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 41baf02924e6..3f3b68199d74 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -572,7 +572,7 @@ EXPORT_SYMBOL(bitmap_scnlistprintf); /** * __bitmap_parselist - convert list format ASCII string to bitmap - * @bp: read nul-terminated user string from this buffer + * @buf: read nul-terminated user string from this buffer * @buflen: buffer size in bytes. If string is smaller than this * then it must be terminated with a \0. * @is_user: location of buffer, 0 indicates kernel space From 7f81c8890c15a10f5220bebae3b6dfae4961962a Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: [PATCH 114/327] fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP Commit a8bef8ff6ea1 ("mm: migration: avoid race between shift_arg_pages() and rmap_walk() during migration by not migrating temporary stacks") introduced a BUG_ON() to ensure that VM_STACK_FLAGS and VM_STACK_INCOMPLETE_SETUP do not overlap. The check is a compile time one, so BUILD_BUG_ON is more appropriate. Signed-off-by: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..b54f74f3cd80 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; From a59ec1e7ff98cc4365d5b1bff4e7102e86b5716b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: [PATCH 115/327] backlight: new driver for the ADP8870 backlight devices Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../sysfs-class-backlight-driver-adp8870 | 56 + drivers/video/backlight/Kconfig | 12 + drivers/video/backlight/Makefile | 1 + drivers/video/backlight/adp8870_bl.c | 1011 +++++++++++++++++ include/linux/i2c/adp8870.h | 153 +++ 5 files changed, 1233 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 create mode 100644 drivers/video/backlight/adp8870_bl.c create mode 100644 include/linux/i2c/adp8870.h diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 new file mode 100644 index 000000000000..aa11dbdd794b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 @@ -0,0 +1,56 @@ +What: /sys/class/backlight//_max +What: /sys/class/backlight//l1_daylight_max +What: /sys/class/backlight//l2_bright_max +What: /sys/class/backlight//l3_office_max +What: /sys/class/backlight//l4_indoor_max +What: /sys/class/backlight//l5_dark_max +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the maximum brightness for + on this . Values are between 0 and 127. This file + will also show the brightness level stored for this + . + +What: /sys/class/backlight//_dim +What: /sys/class/backlight//l2_bright_dim +What: /sys/class/backlight//l3_office_dim +What: /sys/class/backlight//l4_indoor_dim +What: /sys/class/backlight//l5_dark_dim +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the dim brightness for + on this . Values are between 0 and 127, typically + set to 0. Full off when the backlight is disabled. + This file will also show the dim brightness level stored for + this . + +What: /sys/class/backlight//ambient_light_level +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get conversion value of the light sensor. + This value is updated every 80 ms (when the light sensor + is enabled). Returns integer between 0 (dark) and + 8000 (max ambient brightness) + +What: /sys/class/backlight//ambient_light_zone +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get/Set current ambient light zone. Reading returns + integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark). + Writing a value between 1..5 forces the backlight controller + to enter the corresponding ambient light zone. + Writing 0 returns to normal/automatic ambient light level + operation. The ambient light sensing feature on these devices + is an extension to the API documented in + Documentation/ABI/stable/sysfs-class-backlight. + It can be enabled by writing the value stored in + /sys/class/backlight//max_brightness to + /sys/class/backlight//brightness. \ No newline at end of file diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 0c9373bedd1f..2d93c8d61ad5 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -302,6 +302,18 @@ config BACKLIGHT_ADP8860 To compile this driver as a module, choose M here: the module will be called adp8860_bl. +config BACKLIGHT_ADP8870 + tristate "Backlight Driver for ADP8870 using WLED" + depends on BACKLIGHT_CLASS_DEVICE && I2C + select NEW_LEDS + select LEDS_CLASS + help + If you have a LCD backlight connected to the ADP8870, + say Y here to enable this driver. + + To compile this driver as a module, choose M here: the module will + be called adp8870_bl. + config BACKLIGHT_88PM860X tristate "Backlight Driver for 88PM8606 using WLED" depends on MFD_88PM860X diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index b9ca8490df87..ee72adb8786e 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_BACKLIGHT_WM831X) += wm831x_bl.o obj-$(CONFIG_BACKLIGHT_ADX) += adx_bl.o obj-$(CONFIG_BACKLIGHT_ADP5520) += adp5520_bl.o obj-$(CONFIG_BACKLIGHT_ADP8860) += adp8860_bl.o +obj-$(CONFIG_BACKLIGHT_ADP8870) += adp8870_bl.o obj-$(CONFIG_BACKLIGHT_88PM860X) += 88pm860x_bl.o obj-$(CONFIG_BACKLIGHT_PCF50633) += pcf50633-backlight.o diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c new file mode 100644 index 000000000000..e320b62576e2 --- /dev/null +++ b/drivers/video/backlight/adp8870_bl.c @@ -0,0 +1,1011 @@ +/* + * Backlight driver for Analog Devices ADP8870 Backlight Devices + * + * Copyright 2009-2011 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#define ADP8870_EXT_FEATURES +#define ADP8870_USE_LEDS + + +#define ADP8870_MFDVID 0x00 /* Manufacturer and device ID */ +#define ADP8870_MDCR 0x01 /* Device mode and status */ +#define ADP8870_INT_STAT 0x02 /* Interrupts status */ +#define ADP8870_INT_EN 0x03 /* Interrupts enable */ +#define ADP8870_CFGR 0x04 /* Configuration register */ +#define ADP8870_BLSEL 0x05 /* Sink enable backlight or independent */ +#define ADP8870_PWMLED 0x06 /* PWM Enable Selection Register */ +#define ADP8870_BLOFF 0x07 /* Backlight off timeout */ +#define ADP8870_BLDIM 0x08 /* Backlight dim timeout */ +#define ADP8870_BLFR 0x09 /* Backlight fade in and out rates */ +#define ADP8870_BLMX1 0x0A /* Backlight (Brightness Level 1-daylight) maximum current */ +#define ADP8870_BLDM1 0x0B /* Backlight (Brightness Level 1-daylight) dim current */ +#define ADP8870_BLMX2 0x0C /* Backlight (Brightness Level 2-bright) maximum current */ +#define ADP8870_BLDM2 0x0D /* Backlight (Brightness Level 2-bright) dim current */ +#define ADP8870_BLMX3 0x0E /* Backlight (Brightness Level 3-office) maximum current */ +#define ADP8870_BLDM3 0x0F /* Backlight (Brightness Level 3-office) dim current */ +#define ADP8870_BLMX4 0x10 /* Backlight (Brightness Level 4-indoor) maximum current */ +#define ADP8870_BLDM4 0x11 /* Backlight (Brightness Level 4-indoor) dim current */ +#define ADP8870_BLMX5 0x12 /* Backlight (Brightness Level 5-dark) maximum current */ +#define ADP8870_BLDM5 0x13 /* Backlight (Brightness Level 5-dark) dim current */ +#define ADP8870_ISCLAW 0x1A /* Independent sink current fade law register */ +#define ADP8870_ISCC 0x1B /* Independent sink current control register */ +#define ADP8870_ISCT1 0x1C /* Independent Sink Current Timer Register LED[7:5] */ +#define ADP8870_ISCT2 0x1D /* Independent Sink Current Timer Register LED[4:1] */ +#define ADP8870_ISCF 0x1E /* Independent sink current fade register */ +#define ADP8870_ISC1 0x1F /* Independent Sink Current LED1 */ +#define ADP8870_ISC2 0x20 /* Independent Sink Current LED2 */ +#define ADP8870_ISC3 0x21 /* Independent Sink Current LED3 */ +#define ADP8870_ISC4 0x22 /* Independent Sink Current LED4 */ +#define ADP8870_ISC5 0x23 /* Independent Sink Current LED5 */ +#define ADP8870_ISC6 0x24 /* Independent Sink Current LED6 */ +#define ADP8870_ISC7 0x25 /* Independent Sink Current LED7 (Brightness Level 1-daylight) */ +#define ADP8870_ISC7_L2 0x26 /* Independent Sink Current LED7 (Brightness Level 2-bright) */ +#define ADP8870_ISC7_L3 0x27 /* Independent Sink Current LED7 (Brightness Level 3-office) */ +#define ADP8870_ISC7_L4 0x28 /* Independent Sink Current LED7 (Brightness Level 4-indoor) */ +#define ADP8870_ISC7_L5 0x29 /* Independent Sink Current LED7 (Brightness Level 5-dark) */ +#define ADP8870_CMP_CTL 0x2D /* ALS Comparator Control Register */ +#define ADP8870_ALS1_EN 0x2E /* Main ALS comparator level enable */ +#define ADP8870_ALS2_EN 0x2F /* Second ALS comparator level enable */ +#define ADP8870_ALS1_STAT 0x30 /* Main ALS Comparator Status Register */ +#define ADP8870_ALS2_STAT 0x31 /* Second ALS Comparator Status Register */ +#define ADP8870_L2TRP 0x32 /* L2 comparator reference */ +#define ADP8870_L2HYS 0x33 /* L2 hysteresis */ +#define ADP8870_L3TRP 0x34 /* L3 comparator reference */ +#define ADP8870_L3HYS 0x35 /* L3 hysteresis */ +#define ADP8870_L4TRP 0x36 /* L4 comparator reference */ +#define ADP8870_L4HYS 0x37 /* L4 hysteresis */ +#define ADP8870_L5TRP 0x38 /* L5 comparator reference */ +#define ADP8870_L5HYS 0x39 /* L5 hysteresis */ +#define ADP8870_PH1LEVL 0x40 /* First phototransistor ambient light level-low byte register */ +#define ADP8870_PH1LEVH 0x41 /* First phototransistor ambient light level-high byte register */ +#define ADP8870_PH2LEVL 0x42 /* Second phototransistor ambient light level-low byte register */ +#define ADP8870_PH2LEVH 0x43 /* Second phototransistor ambient light level-high byte register */ + +#define ADP8870_MANUFID 0x3 /* Analog Devices AD8870 Manufacturer and device ID */ +#define ADP8870_DEVID(x) ((x) & 0xF) +#define ADP8870_MANID(x) ((x) >> 4) + +/* MDCR Device mode and status */ +#define D7ALSEN (1 << 7) +#define INT_CFG (1 << 6) +#define NSTBY (1 << 5) +#define DIM_EN (1 << 4) +#define GDWN_DIS (1 << 3) +#define SIS_EN (1 << 2) +#define CMP_AUTOEN (1 << 1) +#define BLEN (1 << 0) + +/* ADP8870_ALS1_EN Main ALS comparator level enable */ +#define L5_EN (1 << 3) +#define L4_EN (1 << 2) +#define L3_EN (1 << 1) +#define L2_EN (1 << 0) + +#define CFGR_BLV_SHIFT 3 +#define CFGR_BLV_MASK 0x7 +#define ADP8870_FLAG_LED_MASK 0xFF + +#define FADE_VAL(in, out) ((0xF & (in)) | ((0xF & (out)) << 4)) +#define BL_CFGR_VAL(law, blv) ((((blv) & CFGR_BLV_MASK) << CFGR_BLV_SHIFT) | ((0x3 & (law)) << 1)) +#define ALS_CMPR_CFG_VAL(filt) ((0x7 & (filt)) << 1) + +struct adp8870_bl { + struct i2c_client *client; + struct backlight_device *bl; + struct adp8870_led *led; + struct adp8870_backlight_platform_data *pdata; + struct mutex lock; + unsigned long cached_daylight_max; + int id; + int revid; + int current_brightness; +}; + +struct adp8870_led { + struct led_classdev cdev; + struct work_struct work; + struct i2c_client *client; + enum led_brightness new_brightness; + int id; + int flags; +}; + +static int adp8870_read(struct i2c_client *client, int reg, uint8_t *val) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + if (ret < 0) { + dev_err(&client->dev, "failed reading at 0x%02x\n", reg); + return ret; + } + + *val = ret; + return 0; +} + + +static int adp8870_write(struct i2c_client *client, u8 reg, u8 val) +{ + int ret = i2c_smbus_write_byte_data(client, reg, val); + if (ret) + dev_err(&client->dev, "failed to write\n"); + + return ret; +} + +static int adp8870_set_bits(struct i2c_client *client, int reg, uint8_t bit_mask) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + uint8_t reg_val; + int ret; + + mutex_lock(&data->lock); + + ret = adp8870_read(client, reg, ®_val); + + if (!ret && ((reg_val & bit_mask) == 0)) { + reg_val |= bit_mask; + ret = adp8870_write(client, reg, reg_val); + } + + mutex_unlock(&data->lock); + return ret; +} + +static int adp8870_clr_bits(struct i2c_client *client, int reg, uint8_t bit_mask) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + uint8_t reg_val; + int ret; + + mutex_lock(&data->lock); + + ret = adp8870_read(client, reg, ®_val); + + if (!ret && (reg_val & bit_mask)) { + reg_val &= ~bit_mask; + ret = adp8870_write(client, reg, reg_val); + } + + mutex_unlock(&data->lock); + return ret; +} + +/* + * Independent sink / LED + */ +#if defined(ADP8870_USE_LEDS) +static void adp8870_led_work(struct work_struct *work) +{ + struct adp8870_led *led = container_of(work, struct adp8870_led, work); + adp8870_write(led->client, ADP8870_ISC1 + led->id - 1, + led->new_brightness >> 1); +} + +static void adp8870_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct adp8870_led *led; + + led = container_of(led_cdev, struct adp8870_led, cdev); + led->new_brightness = value; + /* + * Use workqueue for IO since I2C operations can sleep. + */ + schedule_work(&led->work); +} + +static int adp8870_led_setup(struct adp8870_led *led) +{ + struct i2c_client *client = led->client; + int ret = 0; + + ret = adp8870_write(client, ADP8870_ISC1 + led->id - 1, 0); + if (ret) + return ret; + + ret = adp8870_set_bits(client, ADP8870_ISCC, 1 << (led->id - 1)); + if (ret) + return ret; + + if (led->id > 4) + ret = adp8870_set_bits(client, ADP8870_ISCT1, + (led->flags & 0x3) << ((led->id - 5) * 2)); + else + ret = adp8870_set_bits(client, ADP8870_ISCT2, + (led->flags & 0x3) << ((led->id - 1) * 2)); + + return ret; +} + +static int __devinit adp8870_led_probe(struct i2c_client *client) +{ + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + struct adp8870_bl *data = i2c_get_clientdata(client); + struct adp8870_led *led, *led_dat; + struct led_info *cur_led; + int ret, i; + + + led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL); + if (led == NULL) { + dev_err(&client->dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law); + if (ret) + goto err_free; + + ret = adp8870_write(client, ADP8870_ISCT1, + (pdata->led_on_time & 0x3) << 6); + if (ret) + goto err_free; + + ret = adp8870_write(client, ADP8870_ISCF, + FADE_VAL(pdata->led_fade_in, pdata->led_fade_out)); + if (ret) + goto err_free; + + for (i = 0; i < pdata->num_leds; ++i) { + cur_led = &pdata->leds[i]; + led_dat = &led[i]; + + led_dat->id = cur_led->flags & ADP8870_FLAG_LED_MASK; + + if (led_dat->id > 7 || led_dat->id < 1) { + dev_err(&client->dev, "Invalid LED ID %d\n", + led_dat->id); + goto err; + } + + if (pdata->bl_led_assign & (1 << (led_dat->id - 1))) { + dev_err(&client->dev, "LED %d used by Backlight\n", + led_dat->id); + goto err; + } + + led_dat->cdev.name = cur_led->name; + led_dat->cdev.default_trigger = cur_led->default_trigger; + led_dat->cdev.brightness_set = adp8870_led_set; + led_dat->cdev.brightness = LED_OFF; + led_dat->flags = cur_led->flags >> FLAG_OFFT_SHIFT; + led_dat->client = client; + led_dat->new_brightness = LED_OFF; + INIT_WORK(&led_dat->work, adp8870_led_work); + + ret = led_classdev_register(&client->dev, &led_dat->cdev); + if (ret) { + dev_err(&client->dev, "failed to register LED %d\n", + led_dat->id); + goto err; + } + + ret = adp8870_led_setup(led_dat); + if (ret) { + dev_err(&client->dev, "failed to write\n"); + i++; + goto err; + } + } + + data->led = led; + + return 0; + + err: + for (i = i - 1; i >= 0; --i) { + led_classdev_unregister(&led[i].cdev); + cancel_work_sync(&led[i].work); + } + + err_free: + kfree(led); + + return ret; +} + +static int __devexit adp8870_led_remove(struct i2c_client *client) +{ + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + struct adp8870_bl *data = i2c_get_clientdata(client); + int i; + + for (i = 0; i < pdata->num_leds; i++) { + led_classdev_unregister(&data->led[i].cdev); + cancel_work_sync(&data->led[i].work); + } + + kfree(data->led); + return 0; +} +#else +static int __devinit adp8870_led_probe(struct i2c_client *client) +{ + return 0; +} + +static int __devexit adp8870_led_remove(struct i2c_client *client) +{ + return 0; +} +#endif + +static int adp8870_bl_set(struct backlight_device *bl, int brightness) +{ + struct adp8870_bl *data = bl_get_data(bl); + struct i2c_client *client = data->client; + int ret = 0; + + if (data->pdata->en_ambl_sens) { + if ((brightness > 0) && (brightness < ADP8870_MAX_BRIGHTNESS)) { + /* Disable Ambient Light auto adjust */ + ret = adp8870_clr_bits(client, ADP8870_MDCR, + CMP_AUTOEN); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLMX1, brightness); + if (ret) + return ret; + } else { + /* + * MAX_BRIGHTNESS -> Enable Ambient Light auto adjust + * restore daylight l1 sysfs brightness + */ + ret = adp8870_write(client, ADP8870_BLMX1, + data->cached_daylight_max); + if (ret) + return ret; + + ret = adp8870_set_bits(client, ADP8870_MDCR, + CMP_AUTOEN); + if (ret) + return ret; + } + } else { + ret = adp8870_write(client, ADP8870_BLMX1, brightness); + if (ret) + return ret; + } + + if (data->current_brightness && brightness == 0) + ret = adp8870_set_bits(client, + ADP8870_MDCR, DIM_EN); + else if (data->current_brightness == 0 && brightness) + ret = adp8870_clr_bits(client, + ADP8870_MDCR, DIM_EN); + + if (!ret) + data->current_brightness = brightness; + + return ret; +} + +static int adp8870_bl_update_status(struct backlight_device *bl) +{ + int brightness = bl->props.brightness; + if (bl->props.power != FB_BLANK_UNBLANK) + brightness = 0; + + if (bl->props.fb_blank != FB_BLANK_UNBLANK) + brightness = 0; + + return adp8870_bl_set(bl, brightness); +} + +static int adp8870_bl_get_brightness(struct backlight_device *bl) +{ + struct adp8870_bl *data = bl_get_data(bl); + + return data->current_brightness; +} + +static const struct backlight_ops adp8870_bl_ops = { + .update_status = adp8870_bl_update_status, + .get_brightness = adp8870_bl_get_brightness, +}; + +static int adp8870_bl_setup(struct backlight_device *bl) +{ + struct adp8870_bl *data = bl_get_data(bl); + struct i2c_client *client = data->client; + struct adp8870_backlight_platform_data *pdata = data->pdata; + int ret = 0; + + ret = adp8870_write(client, ADP8870_BLSEL, ~pdata->bl_led_assign); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_PWMLED, pdata->pwm_assign); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX1, pdata->l1_daylight_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM1, pdata->l1_daylight_dim); + if (ret) + return ret; + + if (pdata->en_ambl_sens) { + data->cached_daylight_max = pdata->l1_daylight_max; + ret = adp8870_write(client, ADP8870_BLMX2, + pdata->l2_bright_max); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLDM2, + pdata->l2_bright_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX3, + pdata->l3_office_max); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLDM3, + pdata->l3_office_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX4, + pdata->l4_indoor_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM4, + pdata->l4_indor_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX5, + pdata->l5_dark_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM5, + pdata->l5_dark_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L2TRP, pdata->l2_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L2HYS, pdata->l2_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L3TRP, pdata->l3_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L3HYS, pdata->l3_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L4TRP, pdata->l4_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L4HYS, pdata->l4_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L5TRP, pdata->l5_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L5HYS, pdata->l5_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_ALS1_EN, L5_EN | L4_EN | + L3_EN | L2_EN); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_CMP_CTL, + ALS_CMPR_CFG_VAL(pdata->abml_filt)); + if (ret) + return ret; + } + + ret = adp8870_write(client, ADP8870_CFGR, + BL_CFGR_VAL(pdata->bl_fade_law, 0)); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLFR, FADE_VAL(pdata->bl_fade_in, + pdata->bl_fade_out)); + if (ret) + return ret; + /* + * ADP8870 Rev0 requires GDWN_DIS bit set + */ + + ret = adp8870_set_bits(client, ADP8870_MDCR, BLEN | DIM_EN | NSTBY | + (data->revid == 0 ? GDWN_DIS : 0)); + + return ret; +} + +static ssize_t adp8870_show(struct device *dev, char *buf, int reg) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, reg, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + return sprintf(buf, "%u\n", reg_val); +} + +static ssize_t adp8870_store(struct device *dev, const char *buf, + size_t count, int reg) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = strict_strtoul(buf, 10, &val); + if (ret) + return ret; + + mutex_lock(&data->lock); + adp8870_write(data->client, reg, val); + mutex_unlock(&data->lock); + + return count; +} + +static ssize_t adp8870_bl_l5_dark_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX5); +} + +static ssize_t adp8870_bl_l5_dark_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX5); +} +static DEVICE_ATTR(l5_dark_max, 0664, adp8870_bl_l5_dark_max_show, + adp8870_bl_l5_dark_max_store); + + +static ssize_t adp8870_bl_l4_indoor_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX4); +} + +static ssize_t adp8870_bl_l4_indoor_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX4); +} +static DEVICE_ATTR(l4_indoor_max, 0664, adp8870_bl_l4_indoor_max_show, + adp8870_bl_l4_indoor_max_store); + + +static ssize_t adp8870_bl_l3_office_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX3); +} + +static ssize_t adp8870_bl_l3_office_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX3); +} + +static DEVICE_ATTR(l3_office_max, 0664, adp8870_bl_l3_office_max_show, + adp8870_bl_l3_office_max_store); + +static ssize_t adp8870_bl_l2_bright_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX2); +} + +static ssize_t adp8870_bl_l2_bright_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX2); +} +static DEVICE_ATTR(l2_bright_max, 0664, adp8870_bl_l2_bright_max_show, + adp8870_bl_l2_bright_max_store); + +static ssize_t adp8870_bl_l1_daylight_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX1); +} + +static ssize_t adp8870_bl_l1_daylight_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); + if (ret) + return ret; + + return adp8870_store(dev, buf, count, ADP8870_BLMX1); +} +static DEVICE_ATTR(l1_daylight_max, 0664, adp8870_bl_l1_daylight_max_show, + adp8870_bl_l1_daylight_max_store); + +static ssize_t adp8870_bl_l5_dark_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM5); +} + +static ssize_t adp8870_bl_l5_dark_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM5); +} +static DEVICE_ATTR(l5_dark_dim, 0664, adp8870_bl_l5_dark_dim_show, + adp8870_bl_l5_dark_dim_store); + +static ssize_t adp8870_bl_l4_indoor_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM4); +} + +static ssize_t adp8870_bl_l4_indoor_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM4); +} +static DEVICE_ATTR(l4_indoor_dim, 0664, adp8870_bl_l4_indoor_dim_show, + adp8870_bl_l4_indoor_dim_store); + + +static ssize_t adp8870_bl_l3_office_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM3); +} + +static ssize_t adp8870_bl_l3_office_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM3); +} +static DEVICE_ATTR(l3_office_dim, 0664, adp8870_bl_l3_office_dim_show, + adp8870_bl_l3_office_dim_store); + +static ssize_t adp8870_bl_l2_bright_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM2); +} + +static ssize_t adp8870_bl_l2_bright_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM2); +} +static DEVICE_ATTR(l2_bright_dim, 0664, adp8870_bl_l2_bright_dim_show, + adp8870_bl_l2_bright_dim_store); + +static ssize_t adp8870_bl_l1_daylight_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM1); +} + +static ssize_t adp8870_bl_l1_daylight_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM1); +} +static DEVICE_ATTR(l1_daylight_dim, 0664, adp8870_bl_l1_daylight_dim_show, + adp8870_bl_l1_daylight_dim_store); + +#ifdef ADP8870_EXT_FEATURES +static ssize_t adp8870_bl_ambient_light_level_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + uint16_t ret_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, ADP8870_PH1LEVL, ®_val); + if (error < 0) { + mutex_unlock(&data->lock); + return error; + } + ret_val = reg_val; + error = adp8870_read(data->client, ADP8870_PH1LEVH, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + /* Return 13-bit conversion value for the first light sensor */ + ret_val += (reg_val & 0x1F) << 8; + + return sprintf(buf, "%u\n", ret_val); +} +static DEVICE_ATTR(ambient_light_level, 0444, + adp8870_bl_ambient_light_level_show, NULL); + +static ssize_t adp8870_bl_ambient_light_zone_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, ADP8870_CFGR, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + return sprintf(buf, "%u\n", + ((reg_val >> CFGR_BLV_SHIFT) & CFGR_BLV_MASK) + 1); +} + +static ssize_t adp8870_bl_ambient_light_zone_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + unsigned long val; + uint8_t reg_val; + int ret; + + ret = strict_strtoul(buf, 10, &val); + if (ret) + return ret; + + if (val == 0) { + /* Enable automatic ambient light sensing */ + adp8870_set_bits(data->client, ADP8870_MDCR, CMP_AUTOEN); + } else if ((val > 0) && (val < 6)) { + /* Disable automatic ambient light sensing */ + adp8870_clr_bits(data->client, ADP8870_MDCR, CMP_AUTOEN); + + /* Set user supplied ambient light zone */ + mutex_lock(&data->lock); + adp8870_read(data->client, ADP8870_CFGR, ®_val); + reg_val &= ~(CFGR_BLV_MASK << CFGR_BLV_SHIFT); + reg_val |= (val - 1) << CFGR_BLV_SHIFT; + adp8870_write(data->client, ADP8870_CFGR, reg_val); + mutex_unlock(&data->lock); + } + + return count; +} +static DEVICE_ATTR(ambient_light_zone, 0664, + adp8870_bl_ambient_light_zone_show, + adp8870_bl_ambient_light_zone_store); +#endif + +static struct attribute *adp8870_bl_attributes[] = { + &dev_attr_l5_dark_max.attr, + &dev_attr_l5_dark_dim.attr, + &dev_attr_l4_indoor_max.attr, + &dev_attr_l4_indoor_dim.attr, + &dev_attr_l3_office_max.attr, + &dev_attr_l3_office_dim.attr, + &dev_attr_l2_bright_max.attr, + &dev_attr_l2_bright_dim.attr, + &dev_attr_l1_daylight_max.attr, + &dev_attr_l1_daylight_dim.attr, +#ifdef ADP8870_EXT_FEATURES + &dev_attr_ambient_light_level.attr, + &dev_attr_ambient_light_zone.attr, +#endif + NULL +}; + +static const struct attribute_group adp8870_bl_attr_group = { + .attrs = adp8870_bl_attributes, +}; + +static int __devinit adp8870_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct backlight_properties props; + struct backlight_device *bl; + struct adp8870_bl *data; + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + uint8_t reg_val; + int ret; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "SMBUS Byte Data not Supported\n"); + return -EIO; + } + + if (!pdata) { + dev_err(&client->dev, "no platform data?\n"); + return -EINVAL; + } + + ret = adp8870_read(client, ADP8870_MFDVID, ®_val); + if (ret < 0) + return -EIO; + + if (ADP8870_MANID(reg_val) != ADP8870_MANUFID) { + dev_err(&client->dev, "failed to probe\n"); + return -ENODEV; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + data->revid = ADP8870_DEVID(reg_val); + data->client = client; + data->pdata = pdata; + data->id = id->driver_data; + data->current_brightness = 0; + i2c_set_clientdata(client, data); + + mutex_init(&data->lock); + + memset(&props, 0, sizeof(props)); + props.max_brightness = props.brightness = ADP8870_MAX_BRIGHTNESS; + bl = backlight_device_register(dev_driver_string(&client->dev), + &client->dev, data, &adp8870_bl_ops, &props); + if (IS_ERR(bl)) { + dev_err(&client->dev, "failed to register backlight\n"); + ret = PTR_ERR(bl); + goto out2; + } + + data->bl = bl; + + if (pdata->en_ambl_sens) + ret = sysfs_create_group(&bl->dev.kobj, + &adp8870_bl_attr_group); + + if (ret) { + dev_err(&client->dev, "failed to register sysfs\n"); + goto out1; + } + + ret = adp8870_bl_setup(bl); + if (ret) { + ret = -EIO; + goto out; + } + + backlight_update_status(bl); + + dev_info(&client->dev, "Rev.%d Backlight\n", data->revid); + + if (pdata->num_leds) + adp8870_led_probe(client); + + return 0; + +out: + if (data->pdata->en_ambl_sens) + sysfs_remove_group(&data->bl->dev.kobj, + &adp8870_bl_attr_group); +out1: + backlight_device_unregister(bl); +out2: + i2c_set_clientdata(client, NULL); + kfree(data); + + return ret; +} + +static int __devexit adp8870_remove(struct i2c_client *client) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + + adp8870_clr_bits(client, ADP8870_MDCR, NSTBY); + + if (data->led) + adp8870_led_remove(client); + + if (data->pdata->en_ambl_sens) + sysfs_remove_group(&data->bl->dev.kobj, + &adp8870_bl_attr_group); + + backlight_device_unregister(data->bl); + i2c_set_clientdata(client, NULL); + kfree(data); + + return 0; +} + +#ifdef CONFIG_PM +static int adp8870_i2c_suspend(struct i2c_client *client, pm_message_t message) +{ + adp8870_clr_bits(client, ADP8870_MDCR, NSTBY); + + return 0; +} + +static int adp8870_i2c_resume(struct i2c_client *client) +{ + adp8870_set_bits(client, ADP8870_MDCR, NSTBY); + + return 0; +} +#else +#define adp8870_i2c_suspend NULL +#define adp8870_i2c_resume NULL +#endif + +static const struct i2c_device_id adp8870_id[] = { + { "adp8870", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adp8870_id); + +static struct i2c_driver adp8870_driver = { + .driver = { + .name = KBUILD_MODNAME, + }, + .probe = adp8870_probe, + .remove = __devexit_p(adp8870_remove), + .suspend = adp8870_i2c_suspend, + .resume = adp8870_i2c_resume, + .id_table = adp8870_id, +}; + +static int __init adp8870_init(void) +{ + return i2c_add_driver(&adp8870_driver); +} +module_init(adp8870_init); + +static void __exit adp8870_exit(void) +{ + i2c_del_driver(&adp8870_driver); +} +module_exit(adp8870_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael Hennerich "); +MODULE_DESCRIPTION("ADP8870 Backlight driver"); +MODULE_ALIAS("platform:adp8870-backlight"); diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h new file mode 100644 index 000000000000..624dceccbd5b --- /dev/null +++ b/include/linux/i2c/adp8870.h @@ -0,0 +1,153 @@ +/* + * Definitions and platform data for Analog Devices + * Backlight drivers ADP8870 + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __LINUX_I2C_ADP8870_H +#define __LINUX_I2C_ADP8870_H + +#define ID_ADP8870 8870 + +#define ADP8870_MAX_BRIGHTNESS 0x7F +#define FLAG_OFFT_SHIFT 8 + +/* + * LEDs subdevice platform data + */ + +#define ADP8870_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) + +#define ADP8870_LED_ONT_200ms 0 +#define ADP8870_LED_ONT_600ms 1 +#define ADP8870_LED_ONT_800ms 2 +#define ADP8870_LED_ONT_1200ms 3 + +#define ADP8870_LED_D7 (7) +#define ADP8870_LED_D6 (6) +#define ADP8870_LED_D5 (5) +#define ADP8870_LED_D4 (4) +#define ADP8870_LED_D3 (3) +#define ADP8870_LED_D2 (2) +#define ADP8870_LED_D1 (1) + +/* + * Backlight subdevice platform data + */ + +#define ADP8870_BL_D7 (1 << 6) +#define ADP8870_BL_D6 (1 << 5) +#define ADP8870_BL_D5 (1 << 4) +#define ADP8870_BL_D4 (1 << 3) +#define ADP8870_BL_D3 (1 << 2) +#define ADP8870_BL_D2 (1 << 1) +#define ADP8870_BL_D1 (1 << 0) + +#define ADP8870_FADE_T_DIS 0 /* Fade Timer Disabled */ +#define ADP8870_FADE_T_300ms 1 /* 0.3 Sec */ +#define ADP8870_FADE_T_600ms 2 +#define ADP8870_FADE_T_900ms 3 +#define ADP8870_FADE_T_1200ms 4 +#define ADP8870_FADE_T_1500ms 5 +#define ADP8870_FADE_T_1800ms 6 +#define ADP8870_FADE_T_2100ms 7 +#define ADP8870_FADE_T_2400ms 8 +#define ADP8870_FADE_T_2700ms 9 +#define ADP8870_FADE_T_3000ms 10 +#define ADP8870_FADE_T_3500ms 11 +#define ADP8870_FADE_T_4000ms 12 +#define ADP8870_FADE_T_4500ms 13 +#define ADP8870_FADE_T_5000ms 14 +#define ADP8870_FADE_T_5500ms 15 /* 5.5 Sec */ + +#define ADP8870_FADE_LAW_LINEAR 0 +#define ADP8870_FADE_LAW_SQUARE 1 +#define ADP8870_FADE_LAW_CUBIC1 2 +#define ADP8870_FADE_LAW_CUBIC2 3 + +#define ADP8870_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ +#define ADP8870_BL_AMBL_FILT_160ms 1 +#define ADP8870_BL_AMBL_FILT_320ms 2 +#define ADP8870_BL_AMBL_FILT_640ms 3 +#define ADP8870_BL_AMBL_FILT_1280ms 4 +#define ADP8870_BL_AMBL_FILT_2560ms 5 +#define ADP8870_BL_AMBL_FILT_5120ms 6 +#define ADP8870_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ + +/* + * Blacklight current 0..30mA + */ +#define ADP8870_BL_CUR_mA(I) ((I * 127) / 30) + +/* + * L2 comparator current 0..1106uA + */ +#define ADP8870_L2_COMP_CURR_uA(I) ((I * 255) / 1106) + +/* + * L3 comparator current 0..551uA + */ +#define ADP8870_L3_COMP_CURR_uA(I) ((I * 255) / 551) + +/* + * L4 comparator current 0..275uA + */ +#define ADP8870_L4_COMP_CURR_uA(I) ((I * 255) / 275) + +/* + * L5 comparator current 0..138uA + */ +#define ADP8870_L5_COMP_CURR_uA(I) ((I * 255) / 138) + +struct adp8870_backlight_platform_data { + u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ + u8 pwm_assign; /* 1 = Enables PWM mode */ + + u8 bl_fade_in; /* Backlight Fade-In Timer */ + u8 bl_fade_out; /* Backlight Fade-Out Timer */ + u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ + + u8 en_ambl_sens; /* 1 = enable ambient light sensor */ + u8 abml_filt; /* Light sensor filter time */ + + u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indoor_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indor_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + + u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l4_trip; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l4_hyst; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l5_trip; /* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + u8 l5_hyst; /* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + + /** + * Independent Current Sinks / LEDS + * Sinks not assigned to the Backlight can be exposed to + * user space using the LEDS CLASS interface + */ + + int num_leds; + struct led_info *leds; + u8 led_fade_in; /* LED Fade-In Timer */ + u8 led_fade_out; /* LED Fade-Out Timer */ + u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ + u8 led_on_time; +}; + +#endif /* __LINUX_I2C_ADP8870_H */ From e1bbd19bc4afef7adb80cca163800391c4f5773d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 15 Jun 2011 15:08:12 -0700 Subject: [PATCH 116/327] drivers/video/backlight/adp8870_bl.c: add missed props.type conversion Cc: Michael Hennerich Cc: Mike Frysinger Cc: Matthew Garrett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/adp8870_bl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c index e320b62576e2..05a8832bb3eb 100644 --- a/drivers/video/backlight/adp8870_bl.c +++ b/drivers/video/backlight/adp8870_bl.c @@ -889,6 +889,7 @@ static int __devinit adp8870_probe(struct i2c_client *client, mutex_init(&data->lock); memset(&props, 0, sizeof(props)); + props.type = BACKLIGHT_RAW; props.max_brightness = props.brightness = ADP8870_MAX_BRIGHTNESS; bl = backlight_device_register(dev_driver_string(&client->dev), &client->dev, data, &adp8870_bl_ops, &props); From a433658c30974fc87ba3ff52d7e4e6299762aa3d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:13 -0700 Subject: [PATCH 117/327] vmscan,memcg: memcg aware swap token Currently, memcg reclaim can disable swap token even if the swap token mm doesn't belong in its memory cgroup. It's slightly risky. If an admin creates very small mem-cgroup and silly guy runs contentious heavy memory pressure workload, every tasks are going to lose swap token and then system may become unresponsive. That's bad. This patch adds 'memcg' parameter into disable_swap_token(). and if the parameter doesn't match swap token, VM doesn't disable it. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 +++ include/linux/swap.h | 8 +--- mm/memcontrol.c | 16 +++---- mm/thrash.c | 87 +++++++++++++++++++++++++++++--------- mm/vmscan.c | 4 +- 5 files changed, 85 insertions(+), 36 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9724a38ee69d..50940da6adf3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) @@ -246,6 +247,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return NULL; } +static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +{ + return NULL; +} + static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; diff --git a/include/linux/swap.h b/include/linux/swap.h index 384eb5fe530b..e70564647039 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -358,6 +358,7 @@ struct backing_dev_info; extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); extern void __put_swap_token(struct mm_struct *); +extern void disable_swap_token(struct mem_cgroup *memcg); static inline int has_swap_token(struct mm_struct *mm) { @@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm) __put_swap_token(mm); } -static inline void disable_swap_token(void) -{ - put_swap_token(swap_token_mm); -} - #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); @@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm) return 0; } -static inline void disable_swap_token(void) +static inline void disable_swap_token(struct mem_cgroup *memcg) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bd9052a5d3ad..e37c44d88d46 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -735,7 +735,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) struct mem_cgroup, css); } -static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) { struct mem_cgroup *mem = NULL; @@ -5414,18 +5414,16 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *old_cont, struct task_struct *p) { - struct mm_struct *mm; + struct mm_struct *mm = get_task_mm(p); - if (!mc.to) - /* no need to move charge */ - return; - - mm = get_task_mm(p); if (mm) { - mem_cgroup_move_charge(mm); + if (mc.to) + mem_cgroup_move_charge(mm); + put_swap_token(mm); mmput(mm); } - mem_cgroup_clear_mc(); + if (mc.to) + mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, diff --git a/mm/thrash.c b/mm/thrash.c index 2372d4ed5dd8..6cdf86511385 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -21,11 +21,31 @@ #include #include #include +#include static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; +struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + memcg = try_get_mem_cgroup_from_mm(mm); + if (memcg) + css_put(mem_cgroup_css(memcg)); + + return memcg; +} +#else +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + return NULL; +} +#endif + void grab_swap_token(struct mm_struct *mm) { int current_interval; @@ -38,40 +58,69 @@ void grab_swap_token(struct mm_struct *mm) return; /* First come first served */ - if (swap_token_mm == NULL) { - mm->token_priority = mm->token_priority + 2; - swap_token_mm = mm; + if (!swap_token_mm) + goto replace_token; + + if (mm == swap_token_mm) { + mm->token_priority += 2; goto out; } - if (mm != swap_token_mm) { - if (current_interval < mm->last_interval) - mm->token_priority++; - else { - if (likely(mm->token_priority > 0)) - mm->token_priority--; - } - /* Check if we deserve the token */ - if (mm->token_priority > swap_token_mm->token_priority) { - mm->token_priority += 2; - swap_token_mm = mm; - } - } else { - /* Token holder came in again! */ - mm->token_priority += 2; + if (current_interval < mm->last_interval) + mm->token_priority++; + else { + if (likely(mm->token_priority > 0)) + mm->token_priority--; } + /* Check if we deserve the token */ + if (mm->token_priority > swap_token_mm->token_priority) + goto replace_token; + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; spin_unlock(&swap_token_lock); + return; + +replace_token: + mm->token_priority += 2; + swap_token_mm = mm; + swap_token_memcg = swap_token_memcg_from_mm(mm); + goto out; } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) + if (likely(mm == swap_token_mm)) { swap_token_mm = NULL; + swap_token_memcg = NULL; + } spin_unlock(&swap_token_lock); } + +static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b) +{ + if (!a) + return true; + if (!b) + return true; + if (a == b) + return true; + return false; +} + +void disable_swap_token(struct mem_cgroup *memcg) +{ + /* memcg reclaim don't disable unrelated mm token. */ + if (match_memcg(memcg, swap_token_memcg)) { + spin_lock(&swap_token_lock); + if (match_memcg(memcg, swap_token_memcg)) { + swap_token_mm = NULL; + swap_token_memcg = NULL; + } + spin_unlock(&swap_token_lock); + } +} diff --git a/mm/vmscan.c b/mm/vmscan.c index faa0a088f9cc..dbe6ea321df4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2081,7 +2081,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc->nr_scanned = 0; if (!priority) - disable_swap_token(); + disable_swap_token(sc->mem_cgroup); total_scanned += shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2407,7 +2407,7 @@ loop_again: /* The swap token gets in the way of swapout... */ if (!priority) - disable_swap_token(); + disable_swap_token(NULL); all_zones_ok = 1; balanced = 0; From 83cd81a34357a632509f7491eec81e62e71d65f7 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:14 -0700 Subject: [PATCH 118/327] vmscan: implement swap token trace This is useful for observing swap token activity. example output: zsh-1845 [000] 598.962716: update_swap_token_priority: mm=ffff88015eaf7700 old_prio=1 new_prio=0 memtoy-1830 [001] 602.033900: update_swap_token_priority: mm=ffff880037a45880 old_prio=947 new_prio=949 memtoy-1830 [000] 602.041509: update_swap_token_priority: mm=ffff880037a45880 old_prio=949 new_prio=951 memtoy-1830 [000] 602.051959: update_swap_token_priority: mm=ffff880037a45880 old_prio=951 new_prio=953 memtoy-1830 [000] 602.052188: update_swap_token_priority: mm=ffff880037a45880 old_prio=953 new_prio=955 memtoy-1830 [001] 602.427184: put_swap_token: token_mm=ffff880037a45880 zsh-1789 [000] 602.427281: replace_swap_token: old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018 new_prio=2 zsh-1789 [001] 602.433456: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=2 new_prio=4 zsh-1789 [000] 602.437613: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=4 new_prio=6 zsh-1789 [000] 602.443924: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=6 new_prio=8 zsh-1789 [000] 602.451873: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=8 new_prio=10 zsh-1789 [001] 602.462639: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=10 new_prio=12 Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++++++++ mm/thrash.c | 11 ++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ea422aaa23e1..1798e0cee2a9 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "gfpflags.h" #define RECLAIM_WB_ANON 0x0001u @@ -310,6 +312,81 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, show_reclaim_flags(__entry->reclaim_flags)) ); +TRACE_EVENT(replace_swap_token, + TP_PROTO(struct mm_struct *old_mm, + struct mm_struct *new_mm), + + TP_ARGS(old_mm, new_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, old_mm) + __field(unsigned int, old_prio) + __field(struct mm_struct*, new_mm) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->old_mm = old_mm; + __entry->old_prio = old_mm ? old_mm->token_priority : 0; + __entry->new_mm = new_mm; + __entry->new_prio = new_mm->token_priority; + ), + + TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u", + __entry->old_mm, __entry->old_prio, + __entry->new_mm, __entry->new_prio) +); + +DECLARE_EVENT_CLASS(put_swap_token_template, + TP_PROTO(struct mm_struct *swap_token_mm), + + TP_ARGS(swap_token_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, swap_token_mm) + ), + + TP_fast_assign( + __entry->swap_token_mm = swap_token_mm; + ), + + TP_printk("token_mm=%p", __entry->swap_token_mm) +); + +DEFINE_EVENT(put_swap_token_template, put_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm) +); + +DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm), + TP_CONDITION(swap_token_mm != NULL) +); + +TRACE_EVENT_CONDITION(update_swap_token_priority, + TP_PROTO(struct mm_struct *mm, + unsigned int old_prio), + + TP_ARGS(mm, old_prio), + + TP_CONDITION(mm->token_priority != old_prio), + + TP_STRUCT__entry( + __field(struct mm_struct*, mm) + __field(unsigned int, old_prio) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + ), + + TP_printk("mm=%p old_prio=%u new_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio) +); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 6cdf86511385..17d9e29e4c9c 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -23,6 +23,8 @@ #include #include +#include + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; @@ -49,6 +51,7 @@ static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) void grab_swap_token(struct mm_struct *mm) { int current_interval; + unsigned int old_prio = mm->token_priority; global_faults++; @@ -63,7 +66,7 @@ void grab_swap_token(struct mm_struct *mm) if (mm == swap_token_mm) { mm->token_priority += 2; - goto out; + goto update_priority; } if (current_interval < mm->last_interval) @@ -77,6 +80,9 @@ void grab_swap_token(struct mm_struct *mm) if (mm->token_priority > swap_token_mm->token_priority) goto replace_token; +update_priority: + trace_update_swap_token_priority(mm, old_prio); + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; @@ -85,6 +91,7 @@ out: replace_token: mm->token_priority += 2; + trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); goto out; @@ -95,6 +102,7 @@ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); if (likely(mm == swap_token_mm)) { + trace_put_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } @@ -118,6 +126,7 @@ void disable_swap_token(struct mem_cgroup *memcg) if (match_memcg(memcg, swap_token_memcg)) { spin_lock(&swap_token_lock); if (match_memcg(memcg, swap_token_memcg)) { + trace_disable_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } From d7911ef30cb7bec52234c2b7a5c275ac8f07905a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:15 -0700 Subject: [PATCH 119/327] vmscan: implement swap token priority aging While testing for memcg aware swap token, I observed a swap token was often grabbed an intermittent running process (eg init, auditd) and they never release a token. Why? Some processes (eg init, auditd, audispd) wake up when a process exiting. And swap token can be get first page-in process when a process exiting makes no swap token owner. Thus such above intermittent running process often get a token. And currently, swap token priority is only decreased at page fault path. Then, if the process sleep immediately after to grab swap token, the swap token priority never be decreased. That's obviously undesirable. This patch implement very poor (and lightweight) priority aging. It only be affect to the above corner case and doesn't change swap tendency workload performance (eg multi process qsbench load) Signed-off-by: KOSAKI Motohiro Reviewed-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 20 +++++++++++++------- mm/thrash.c | 11 ++++++++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 1798e0cee2a9..b2c33bd955fa 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, TRACE_EVENT_CONDITION(update_swap_token_priority, TP_PROTO(struct mm_struct *mm, - unsigned int old_prio), + unsigned int old_prio, + struct mm_struct *swap_token_mm), - TP_ARGS(mm, old_prio), + TP_ARGS(mm, old_prio, swap_token_mm), TP_CONDITION(mm->token_priority != old_prio), @@ -376,16 +377,21 @@ TRACE_EVENT_CONDITION(update_swap_token_priority, __field(struct mm_struct*, mm) __field(unsigned int, old_prio) __field(unsigned int, new_prio) + __field(struct mm_struct*, swap_token_mm) + __field(unsigned int, swap_token_prio) ), TP_fast_assign( - __entry->mm = mm; - __entry->old_prio = old_prio; - __entry->new_prio = mm->token_priority; + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + __entry->swap_token_mm = swap_token_mm; + __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0; ), - TP_printk("mm=%p old_prio=%u new_prio=%u", - __entry->mm, __entry->old_prio, __entry->new_prio) + TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio, + __entry->swap_token_mm, __entry->swap_token_prio) ); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 17d9e29e4c9c..fabf2d0f5169 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -25,10 +25,13 @@ #include +#define TOKEN_AGING_INTERVAL (0xFF) + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +static unsigned int last_aging; #ifdef CONFIG_CGROUP_MEM_RES_CTLR static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) @@ -64,6 +67,11 @@ void grab_swap_token(struct mm_struct *mm) if (!swap_token_mm) goto replace_token; + if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { + swap_token_mm->token_priority /= 2; + last_aging = global_faults; + } + if (mm == swap_token_mm) { mm->token_priority += 2; goto update_priority; @@ -81,7 +89,7 @@ void grab_swap_token(struct mm_struct *mm) goto replace_token; update_priority: - trace_update_swap_token_priority(mm, old_prio); + trace_update_swap_token_priority(mm, old_prio, swap_token_mm); out: mm->faultstamp = global_faults; @@ -94,6 +102,7 @@ replace_token: trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); + last_aging = global_faults; goto out; } From 50c35e5ba255fd8428cef8ff076da8d23bfd4909 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Wed, 15 Jun 2011 15:08:16 -0700 Subject: [PATCH 120/327] memcg: add documentation for the memory.numastat API [akpm@linux-foundation.org: rework text, fit it into 80-cols] Signed-off-by: Ying Han Reviewed-by: KOSAKI Motohiro Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memory.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 7c163477fcd8..510d64570d60 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -70,6 +70,7 @@ Brief summary of control files. (See sysctl's vm.swappiness) memory.move_charge_at_immigrate # set/show controls of moving charges memory.oom_control # set/show oom controls. + memory.numa_stat # show the number of memory usage per numa node 1. History @@ -464,6 +465,24 @@ value for efficient access. (Of course, when necessary, it's synchronized.) If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) value in memory.stat(see 5.2). +5.6 numa_stat + +This is similar to numa_maps but operates on a per-memcg basis. This is +useful for providing visibility into the numa locality information within +an memcg since the pages are allowed to be allocated from any physical +node. One of the usecases is evaluating application performance by +combining this information with the application's cpu allocation. + +We export "total", "file", "anon" and "unevictable" pages per-node for +each memcg. The ouput format of memory.numa_stat is: + +total= N0= N1= ... +file= N0= N1= ... +anon= N0= N1= ... +unevictable= N0= N1= ... + +And we have total = file + anon + unevictable. + 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. From ac5622418bbff9cd3dc607aa57dfb4f62a7f2043 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jun 2011 15:08:17 -0700 Subject: [PATCH 121/327] kmsg_dump.h: fix build when CONFIG_PRINTK is disabled Fix when CONFIG_PRINTK is not enabled: include/linux/kmsg_dump.h:56: error: 'EINVAL' undeclared (first use in this function) include/linux/kmsg_dump.h:61: error: 'EINVAL' undeclared (first use in this function) Looks like commit 595dd3d8bf95 ("kmsg_dump: fix build for CONFIG_PRINTK=n") uses EINVAL without having the needed header file(s), but I'm sure that I build tested that patch also. oh well. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 2a0d7d651dc3..ee0c952188de 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -12,6 +12,7 @@ #ifndef _LINUX_KMSG_DUMP_H #define _LINUX_KMSG_DUMP_H +#include #include enum kmsg_dump_reason { From 17441227f6258fc379c6ebfe21c3eec43b6f0de3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 15 Jun 2011 15:08:17 -0700 Subject: [PATCH 122/327] checkpatch: add warning for uses of printk_ratelimit Warn about uses of printk_ratelimit() because it uses a global state and can hide subsequent useful messages. Signed-off-by: Joe Perches Cc: Andy Whitcroft Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 8657f99bfb2b..b0aa2c680593 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1943,6 +1943,11 @@ sub process { WARN("LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr); } +# check for uses of printk_ratelimit + if ($line =~ /\bprintk_ratelimit\s*\(/) { + WARN("Prefer printk_ratelimited or pr__ratelimited to printk_ratelimit\n" . $herecurr); + } + # printk should use KERN_* levels. Note that follow on printk's on the # same line do not need a level, so we use the current block context # to try and find and validate the current printk. In summary the current From 32e45ff43eaf5c17f5a82c9ad358d515622c2562 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:20 -0700 Subject: [PATCH 123/327] mm: increase RECLAIM_DISTANCE to 30 Recently, Robert Mueller reported (http://lkml.org/lkml/2010/9/12/236) that zone_reclaim_mode doesn't work properly on his new NUMA server (Dual Xeon E5520 + Intel S5520UR MB). He is using Cyrus IMAPd and it's built on a very traditional single-process model. * a master process which reads config files and manages the other process * multiple imapd processes, one per connection * multiple pop3d processes, one per connection * multiple lmtpd processes, one per connection * periodical "cleanup" processes. There are thousands of independent processes. The problem is, recent Intel motherboard turn on zone_reclaim_mode by default and traditional prefork model software don't work well on it. Unfortunatelly, such models are still typical even in the 21st century. We can't ignore them. This patch raises the zone_reclaim_mode threshold to 30. 30 doesn't have any specific meaning. but 20 means that one-hop QPI/Hypertransport and such relatively cheap 2-4 socket machine are often used for traditional servers as above. The intention is that these machines don't use zone_reclaim_mode. Note: ia64 and Power have arch specific RECLAIM_DISTANCE definitions. This patch doesn't change such high-end NUMA machine behavior. Dave Hansen said: : I know specifically of pieces of x86 hardware that set the information : in the BIOS to '21' *specifically* so they'll get the zone_reclaim_mode : behavior which that implies. : : They've done performance testing and run very large and scary benchmarks : to make sure that they _want_ this turned on. What this means for them : is that they'll probably be de-optimized, at least on newer versions of : the kernel. : : If you want to do this for particular systems, maybe _that_'s what we : should do. Have a list of specific configurations that need the : defaults overridden either because they're buggy, or they have an : unusual hardware configuration not really reflected in the distance : table. And later said: : The original change in the hardware tables was for the benefit of a : benchmark. Said benchmark isn't going to get run on mainline until the : next batch of enterprise distros drops, at which point the hardware where : this was done will be irrelevant for the benchmark. I'm sure any new : hardware will just set this distance to another yet arbitrary value to : make the kernel do what it wants. :) : : Also, when the hardware got _set_ to this initially, I complained. So, I : guess I'm getting my way now, with this patch. I'm cool with it. Reported-by: Robert Mueller Signed-off-by: KOSAKI Motohiro Acked-by: Christoph Lameter Acked-by: David Rientjes Reviewed-by: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Cc: "Luck, Tony" Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/topology.h b/include/linux/topology.h index b91a40e847d2..fc839bfa7935 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -60,7 +60,7 @@ int arch_update_cpu_topology(void); * (in whatever arch specific measurement units returned by node_distance()) * then switch on zone reclaim on boot. */ -#define RECLAIM_DISTANCE 20 +#define RECLAIM_DISTANCE 30 #endif #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) From 5a1e6f75831bf1f8e596d642cd8a2512f11548fc Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 15 Jun 2011 15:08:21 -0700 Subject: [PATCH 124/327] drivers/misc/spear13xx_pcie_gadget.c: fix a memory leak in spear_pcie_gadget_probe error path In the case of goto err_kzalloc, we should kfree target. Signed-off-by: Axel Lin Acked-by: Pratyush Anand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/spear13xx_pcie_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c index 7aded90f9daa..cfbddbef11de 100644 --- a/drivers/misc/spear13xx_pcie_gadget.c +++ b/drivers/misc/spear13xx_pcie_gadget.c @@ -845,7 +845,7 @@ err_iounmap: err_iounmap_app: iounmap(config->va_app_base); err_kzalloc: - kfree(config); + kfree(target); err_rel_res: release_mem_region(res1->start, resource_size(res1)); err_rel_res0: From 4bbd61fb9726808e72ab2aa440401f6e5e1aa8f7 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Wed, 15 Jun 2011 15:08:22 -0700 Subject: [PATCH 125/327] drivers/misc/cs5535-mfgpt.c: fix wrong if condition Fix the wrong `if' condition for the check if the requested timer is available. The bitmap avail is used to store if a timer is used already. test_bit() is used to check if the requested timer is available. If a bit in the avail bitmap is set it means that the timer is available. The runtime effect would be that allocating a specific timer always fails (versus telling cs5535_mfgpt_alloc_timer to allocate the first available timer, which works). Signed-off-by: Christian Gmeiner Acked-by: Andres Salomon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/cs5535-mfgpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c index e01e08c8c88b..bc685bfc4c33 100644 --- a/drivers/misc/cs5535-mfgpt.c +++ b/drivers/misc/cs5535-mfgpt.c @@ -174,7 +174,7 @@ struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain) timer_nr = t < max ? (int) t : -1; } else { /* check if the requested timer's available */ - if (test_bit(timer_nr, mfgpt->avail)) + if (!test_bit(timer_nr, mfgpt->avail)) timer_nr = -1; } From 5f1a19070b16c20cdc71ed0e981bfa19f8f6a4ee Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Jun 2011 15:08:23 -0700 Subject: [PATCH 126/327] mm: fix wrong kunmap_atomic() pointer Running a ktest.pl test, I hit the following bug on x86_32: ------------[ cut here ]------------ WARNING: at arch/x86/mm/highmem_32.c:81 __kunmap_atomic+0x64/0xc1() Hardware name: Modules linked in: Pid: 93, comm: sh Not tainted 2.6.39-test+ #1 Call Trace: [] warn_slowpath_common+0x7c/0x91 [] ? __kunmap_atomic+0x64/0xc1 [] ? __kunmap_atomic+0x64/0xc1^M [] warn_slowpath_null+0x22/0x24 [] __kunmap_atomic+0x64/0xc1 [] unmap_vmas+0x43a/0x4e0 [] exit_mmap+0x91/0xd2 [] mmput+0x43/0xad [] exit_mm+0x111/0x119 [] do_exit+0x1ff/0x5fa [] ? set_current_blocked+0x3c/0x40 [] ? sigprocmask+0x7e/0x8e [] do_group_exit+0x65/0x88 [] sys_exit_group+0x18/0x1c [] sysenter_do_call+0x12/0x38 ---[ end trace 8055f74ea3c0eb62 ]--- Running a ktest.pl git bisect, found the culprit: commit e303297e6c3a ("mm: extended batches for generic mmu_gather") But although this was the commit triggering the bug, it was not the one originally responsible for the bug. That was commit d16dfc550f53 ("mm: mmu_gather rework"). The code in zap_pte_range() has something that looks like the following: pte = pte_offset_map_lock(mm, pmd, addr, &ptl); do { [...] } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); The pte starts off pointing at the first element in the page table directory that was returned by the pte_offset_map_lock(). When it's done with the page, pte will be pointing to anything between the next entry and the first entry of the next page inclusive. By doing a pte - 1, this puts the pte back onto the original page, which is all that pte_unmap_unlock() needs. In most archs (64 bit), this is not an issue as the pte is ignored in the pte_unmap_unlock(). But on 32 bit archs, where things may be kmapped, it is essential that the pte passed to pte_unmap_unlock() resides on the same page that was given by pte_offest_map_lock(). The problem came in d16dfc55 ("mm: mmu_gather rework") where it introduced a "break;" from the while loop. This alone did not seem to easily trigger the bug. But the modifications made by e303297e6 caused that "break;" to be hit on the first iteration, before the pte++. The pte not being incremented will now cause pte_unmap_unlock(pte - 1) to be pointing to the previous page. This will cause the wrong page to be unmapped, and also trigger the warning above. The simple solution is to just save the pointer given by pte_offset_map_lock() and use it in the unlock. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b13e7dbc399b..87d935333f0d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1112,11 +1112,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, int force_flush = 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; + pte_t *start_pte; pte_t *pte; again: init_rss_vec(rss); - pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte = start_pte; arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; @@ -1196,7 +1198,7 @@ again: add_mm_rss_vec(mm, rss); arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(start_pte, ptl); /* * mmu_gather ran out of room to batch pages, we break out of From 3957c7768e5ea02fd3345176ddd340f820e5d285 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Jun 2011 15:08:25 -0700 Subject: [PATCH 127/327] mm: compaction: fix special case -1 order checks Commit 56de7263fcf3 ("mm: compaction: direct compact when a high-order allocation fails") introduced a check for cc->order == -1 in compact_finished. We should continue compacting in that case because the request came from userspace and there is no particular order to compact for. Similar check has been added by 82478fb7 (mm: compaction: prevent division-by-zero during user-requested compaction) for compaction_suitable. The check is, however, done after zone_watermark_ok which uses order as a right hand argument for shifts. Not only watermark check is pointless if we can break out without it but it also uses 1 << -1 which is not well defined (at least from C standard). Let's move the -1 check above zone_watermark_ok. [minchan.kim@gmail.com> - caught compaction_suitable] Signed-off-by: Michal Hocko Cc: Mel Gorman Reviewed-by: Minchan Kim Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 021a2960ef9e..94bdbe1f7caf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -420,13 +420,6 @@ static int compact_finished(struct zone *zone, if (cc->free_pfn <= cc->migrate_pfn) return COMPACT_COMPLETE; - /* Compaction run is not finished if the watermark is not met */ - watermark = low_wmark_pages(zone); - watermark += (1 << cc->order); - - if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) - return COMPACT_CONTINUE; - /* * order == -1 is expected when compacting via * /proc/sys/vm/compact_memory @@ -434,6 +427,13 @@ static int compact_finished(struct zone *zone, if (cc->order == -1) return COMPACT_CONTINUE; + /* Compaction run is not finished if the watermark is not met */ + watermark = low_wmark_pages(zone); + watermark += (1 << cc->order); + + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) + return COMPACT_CONTINUE; + /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -460,6 +460,13 @@ unsigned long compaction_suitable(struct zone *zone, int order) int fragindex; unsigned long watermark; + /* + * order == -1 is expected when compacting via + * /proc/sys/vm/compact_memory + */ + if (order == -1) + return COMPACT_CONTINUE; + /* * Watermarks for order-0 must be met for compaction. Note the 2UL. * This is because during migration, copies of pages need to be @@ -469,13 +476,6 @@ unsigned long compaction_suitable(struct zone *zone, int order) if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) return COMPACT_SKIPPED; - /* - * order == -1 is expected when compacting via - * /proc/sys/vm/compact_memory - */ - if (order == -1) - return COMPACT_CONTINUE; - /* * fragmentation index determines if allocation failures are due to * low memory or external fragmentation From ca39599c633fb02aceac31a7e67563612e4fe347 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 15 Jun 2011 15:08:27 -0700 Subject: [PATCH 128/327] BUILD_BUG_ON_ZERO: fix sparse breakage BUILD_BUG_ON_ZERO and BUILD_BUG_ON_NULL must return values, even in the CHECKER case otherwise various users of it become syntactically invalid. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fb0e7329fee1..953352a88336 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -671,8 +671,8 @@ struct sysinfo { #ifdef __CHECKER__ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) -#define BUILD_BUG_ON_ZERO(e) -#define BUILD_BUG_ON_NULL(e) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void*)0) #define BUILD_BUG_ON(condition) #else /* __CHECKER__ */ From bd5dc17be87b3a3073d50b23802647db3ae3fa8e Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 15 Jun 2011 15:08:28 -0700 Subject: [PATCH 129/327] uts: make default hostname configurable, rather than always using "(none)" The "hostname" tool falls back to setting the hostname to "localhost" if /etc/hostname does not exist. Distribution init scripts have the same fallback. However, if userspace never calls sethostname, such as when booting with init=/bin/sh, or otherwise booting a minimal system without the usual init scripts, the default hostname of "(none)" remains, unhelpfully appearing in various places such as prompts ("root@(none):~#") and logs. Furthermore, "(none)" doesn't typically resolve to anything useful. Make the default hostname configurable. This removes the need for the standard fallback, provides a useful default for systems that never call sethostname, and makes minimal systems that much more useful with less configuration. Distributions could choose to use "localhost" here to avoid the fallback, while embedded systems may wish to use a specific target hostname. Signed-off-by: Josh Triplett Acked-by: Linus Torvalds Acked-by: David Miller Cc: Serge Hallyn Cc: Kel Modderman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uts.h | 2 +- init/Kconfig | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/uts.h b/include/linux/uts.h index 73eb1ed36ec4..6ddbd86377de 100644 --- a/include/linux/uts.h +++ b/include/linux/uts.h @@ -9,7 +9,7 @@ #endif #ifndef UTS_NODENAME -#define UTS_NODENAME "(none)" /* set by sethostname() */ +#define UTS_NODENAME CONFIG_DEFAULT_HOSTNAME /* set by sethostname() */ #endif #ifndef UTS_DOMAINNAME diff --git a/init/Kconfig b/init/Kconfig index ebafac4231ee..e0a22e42d39a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -204,6 +204,15 @@ config KERNEL_LZO endchoice +config DEFAULT_HOSTNAME + string "Default hostname" + default "(none)" + help + This option determines the default system hostname before userspace + calls sethostname(2). The kernel traditionally uses "(none)" here, + but you may wish to use a different default here to make a minimal + system more usable with less configuration. + config SWAP bool "Support for paging of anonymous memory (swap)" depends on MMU && BLOCK From 185e595f770219f2329e590a6be69e6e89e152af Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 15 Jun 2011 15:08:30 -0700 Subject: [PATCH 130/327] MAINTAINERS: Balbir has moved Update my email address. Email will start to the old address bouncing soon Signed-off-by: Balbir Singh Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2cc3a9430503..88c016d209ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4144,7 +4144,7 @@ F: include/linux/mm.h F: mm/ MEMORY RESOURCE CONTROLLER -M: Balbir Singh +M: Balbir Singh M: Daisuke Nishimura M: KAMEZAWA Hiroyuki L: linux-mm@kvack.org @@ -4889,7 +4889,7 @@ F: mm/percpu*.c F: arch/*/include/asm/percpu.h PER-TASK DELAY ACCOUNTING -M: Balbir Singh +M: Balbir Singh S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c @@ -6097,7 +6097,7 @@ F: include/target/ F: Documentation/target/ TASKSTATS STATISTICS INTERFACE -M: Balbir Singh +M: Balbir Singh S: Maintained F: Documentation/accounting/taskstats* F: include/linux/taskstats* From 9d8f776bfb812dd23fcdbb698e9ac4298fc3c624 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 15 Jun 2011 15:08:31 -0700 Subject: [PATCH 131/327] drivers/leds/leds-asic3: make LEDS_ASIC3 depend on LEDS_CLASS We call led_classdev_unregister/led_classdev_register in asic3_led_remove/asic3_led_probe, thus make LEDS_ASIC3 depend on LEDS_CLASS. This patch fixes below build error if LEDS_CLASS is not configured. LD .tmp_vmlinux1 drivers/built-in.o: In function `asic3_led_remove': clkdev.c:(.devexit.text+0x1860): undefined reference to `led_classdev_unregister' drivers/built-in.o: In function `asic3_led_probe': clkdev.c:(.devinit.text+0xcee8): undefined reference to `led_classdev_register' make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Axel Lin Cc: Paul Parsons Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 23f0d5e99f35..883813d7843c 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -391,6 +391,7 @@ config LEDS_NETXBIG config LEDS_ASIC3 bool "LED support for the HTC ASIC3" + depends on LEDS_CLASS depends on MFD_ASIC3 default y help From be5ce2f1c93295711be4ae5565f9194ed9776ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 15 Jun 2011 15:08:31 -0700 Subject: [PATCH 132/327] leds: move LEDS_GPIO_REGISTER out of menuconfig NEW_LEDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4440673a95e6 ("leds: provide helper to register "leds-gpio" devices") broke the display of the NEW_LEDS menu as it didn't depend on NEW_LEDS and so made "LED drivers" and "LED Triggers" appear at the same level as "LED Support" instead of below it as it was before 4440673a. Moving LEDS_GPIO_REGISTER out of the menuconfig NEW_LEDS fixes this unintended side effect. Reported-by: Axel Lin Signed-off-by: Uwe Kleine-König Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 883813d7843c..1032730cd17b 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -1,3 +1,10 @@ +config LEDS_GPIO_REGISTER + bool + help + This option provides the function gpio_led_register_device. + As this function is used by arch code it must not be compiled as a + module. + menuconfig NEW_LEDS bool "LED Support" help @@ -14,13 +21,6 @@ config LEDS_CLASS This option enables the led sysfs class in /sys/class/leds. You'll need this to do anything useful with LEDs. If unsure, say N. -config LEDS_GPIO_REGISTER - bool - help - This option provides the function gpio_led_register_device. - As this function is used by arch code it must not be compiled as a - module. - if NEW_LEDS comment "LED drivers" From 9e6f343852cb16ea961ba5be2ca8dde609aa6f23 Mon Sep 17 00:00:00 2001 From: Pawel Osciak Date: Wed, 15 Jun 2011 15:08:32 -0700 Subject: [PATCH 133/327] MAINTAINERS: add videobuf2 maintainers Add maintainers for the videobuf2 V4L2 driver framework. Signed-off-by: Pawel Osciak Acked-by: Marek Szyprowski Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 88c016d209ff..518442ab25af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6716,6 +6716,14 @@ S: Maintained F: Documentation/filesystems/vfat.txt F: fs/fat/ +VIDEOBUF2 FRAMEWORK +M: Pawel Osciak +M: Marek Szyprowski +L: linux-media@vger.kernel.org +S: Maintained +F: drivers/media/video/videobuf2-* +F: include/media/videobuf2-* + VIRTIO CONSOLE DRIVER M: Amit Shah L: virtualization@lists.linux-foundation.org From 49b24d6b41c576ba43153fc94695f871cce139a5 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 15 Jun 2011 15:08:34 -0700 Subject: [PATCH 134/327] include/asm-generic/pgtable.h: fix unbalanced parenthesis Signed-off-by: Nicolas Kaiser Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e9b8e5926bef..76bff2bff15e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -88,7 +88,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, pmd_t pmd = *pmdp; pmd_clear(mm, address, pmdp); return pmd; -}) +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif From 26575f9544530127757297d4de8fb2f2c75f1f69 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2011 15:08:35 -0700 Subject: [PATCH 135/327] w1: W1_MASTER_DS1WM should depend on GENERIC_HARDIRQS On m68k (which doesn't support generic hardirqs yet): drivers/w1/masters/ds1wm.c: In function `ds1wm_probe': drivers/w1/masters/ds1wm.c: error: implicit declaration of function `irq_set_irq_type' Signed-off-by: Geert Uytterhoeven Cc: Evgeniy Polyakov Cc: Jean-Franois Dagenais Cc: Matt Reimer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/masters/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig index 00d615d7aa21..979d6eed9a0f 100644 --- a/drivers/w1/masters/Kconfig +++ b/drivers/w1/masters/Kconfig @@ -42,7 +42,7 @@ config W1_MASTER_MXC config W1_MASTER_DS1WM tristate "Maxim DS1WM 1-wire busmaster" - depends on W1 + depends on W1 && GENERIC_HARDIRQS help Say Y here to enable the DS1WM 1-wire driver, such as that in HP iPAQ devices like h5xxx, h2200, and ASIC3-based like From de695e159e3fd679594eb45449d2638d54434c32 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 Jun 2011 15:08:37 -0700 Subject: [PATCH 136/327] init/calibrate.c: remove annoying printk Remove calibrate_delay_direct()'s KERN_DEBUG printk related to bogomips calculation as it appears when booting every core on setups with 'ignore_loglevel' which dmesg people scan for possible issues. As the message doesn't show very useful information to the widest audience of kernel boot message gazers, it should be removed. Introduced by commit d2b463135f84 ("init/calibrate.c: fix for critical bogoMIPS intermittent calculation failure"). Signed-off-by: Borislav Petkov Cc: Andrew Worsley Cc: Phil Carmody Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/calibrate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/init/calibrate.c b/init/calibrate.c index cfd7000c9d71..2568d22a304e 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -93,9 +93,6 @@ static unsigned long __cpuinit calibrate_delay_direct(void) * If the upper limit and lower limit of the timer_rate is * >= 12.5% apart, redo calibration. */ - printk(KERN_DEBUG "calibrate_delay_direct() timer_rate_max=%lu " - "timer_rate_min=%lu pre_start=%lu pre_end=%lu\n", - timer_rate_max, timer_rate_min, pre_start, pre_end); if (start >= post_end) printk(KERN_NOTICE "calibrate_delay_direct() ignoring " "timer_rate as we had a TSC wrap around" From 959ecc48fc7506b9d7825ea70e40d92d9b308033 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:38 -0700 Subject: [PATCH 137/327] mm/memory_hotplug.c: fix building of node hotplug zonelist During memory hotplug we refresh zonelists when we online a page in a new zone. It means that the node's zonelist is not initialized until pages are onlined. So for example, "nid" passed by MEM_GOING_ONLINE notifier will point to NODE_DATA(nid) which has no zone fallback list. Moreover, if we hot-add cpu-only nodes, alloc_pages() will do no fallback. This patch makes a zonelist when a new pgdata is available. Note: in production, at fujitsu, memory should be onlined before cpu and our server didn't have any memory-less nodes and had no problems. But recent changes in MEM_GOING_ONLINE+page_cgroup will access not initialized zonelist of node. Anyway, there are memory-less node and we need some care. Signed-off-by: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9f646374e32f..02159c755136 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -494,6 +494,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) /* init node's zones as empty zones, we don't have any present pages.*/ free_area_init_node(nid, zones_size, start_pfn, zholes_size); + /* + * The node we allocated has no zone fallback lists. For avoiding + * to access not-initialized zonelist, build here. + */ + build_all_zonelists(NULL); + return pgdat; } From b0320c7b7d1ac1bd5c2d9dff3258524ab39bad32 Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Wed, 15 Jun 2011 15:08:39 -0700 Subject: [PATCH 138/327] mm: fix negative commitlimit when gigantic hugepages are allocated When 1GB hugepages are allocated on a system, free(1) reports less available memory than what really is installed in the box. Also, if the total size of hugepages allocated on a system is over half of the total memory size, CommitLimit becomes a negative number. The problem is that gigantic hugepages (order > MAX_ORDER) can only be allocated at boot with bootmem, thus its frames are not accounted to 'totalram_pages'. However, they are accounted to hugetlb_total_pages() What happens to turn CommitLimit into a negative number is this calculation, in fs/proc/meminfo.c: allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; A similar calculation occurs in __vm_enough_memory() in mm/mmap.c. Also, every vm statistic which depends on 'totalram_pages' will render confusing values, as if system were 'missing' some part of its memory. Impact of this bug: When gigantic hugepages are allocated and sysctl_overcommit_memory == OVERCOMMIT_NEVER. In a such situation, __vm_enough_memory() goes through the mentioned 'allowed' calculation and might end up mistakenly returning -ENOMEM, thus forcing the system to start reclaiming pages earlier than it would be ususal, and this could cause detrimental impact to overall system's performance, depending on the workload. Besides the aforementioned scenario, I can only think of this causing annoyances with memory reports from /proc/meminfo and free(1). [akpm@linux-foundation.org: standardize comment layout] Reported-by: Russ Anderson Signed-off-by: Rafael Aquini Acked-by: Russ Anderson Cc: Andrea Arcangeli Cc: Christoph Lameter Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6402458fee38..bfcf153bc829 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1111,6 +1111,14 @@ static void __init gather_bootmem_prealloc(void) WARN_ON(page_count(page) != 1); prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); + /* + * If we had gigantic hugepages allocated at boot time, we need + * to restore the 'stolen' pages to totalram_pages in order to + * fix confusing memory reports from free(1) and another + * side-effects, like CommitLimit going negative. + */ + if (h->order > (MAX_ORDER - 1)) + totalram_pages += 1 << h->order; } } From 45d16f09ddd66597e561876f5652c05bf986360a Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 15 Jun 2011 15:08:40 -0700 Subject: [PATCH 139/327] leds: fix the incorrect display in menuconfig Seems when a config option does not have a dependency of the menuconfig, it messes the display of the rest configs, even if it's a hidden one. Signed-off-by: Eric Miao Cc: Richard Purdie Cc: Valdis Kletnieks Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 1032730cd17b..713d43b4e563 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -14,15 +14,14 @@ menuconfig NEW_LEDS This is not related to standard keyboard LEDs which are controlled via the input system. +if NEW_LEDS + config LEDS_CLASS bool "LED Class Support" - depends on NEW_LEDS help This option enables the led sysfs class in /sys/class/leds. You'll need this to do anything useful with LEDs. If unsure, say N. -if NEW_LEDS - comment "LED drivers" config LEDS_88PM860X From 8957712710e045044e3c44375c6a87d7ffa17d51 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:41 -0700 Subject: [PATCH 140/327] mm: memory.numa_stat: fix file permission Commit 406eb0c9ba76 ("memcg: add memory.numastat api for numa statistics") adds memory.numa_stat file for memory cgroup. But the file permissions are wrong. [kamezawa@bluextal linux-2.6]$ ls -l /cgroup/memory/A/memory.numa_stat ---------- 1 root root 0 Jun 9 18:36 /cgroup/memory/A/memory.numa_stat This patch fixes the permission as [root@bluextal kamezawa]# ls -l /cgroup/memory/A/memory.numa_stat -r--r--r-- 1 root root 0 Jun 10 16:49 /cgroup/memory/A/memory.numa_stat Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Ying Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e37c44d88d46..02a7947608ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4640,6 +4640,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "numa_stat", .open = mem_control_numa_stat_open, + .mode = S_IRUGO, }, #endif }; From 37573e8c718277103f61f03741bdc5606d31b07e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:42 -0700 Subject: [PATCH 141/327] memcg: fix init_page_cgroup nid with sparsemem Commit 21a3c9646873 ("memcg: allocate memory cgroup structures in local nodes") makes page_cgroup allocation as NUMA aware. But that caused a problem https://bugzilla.kernel.org/show_bug.cgi?id=36192. The problem was getting a NID from invalid struct pages, which was not initialized because it was out-of-node, out of [node_start_pfn, node_end_pfn) Now, with sparsemem, page_cgroup_init scans pfn from 0 to max_pfn. But this may scan a pfn which is not on any node and can access memmap which is not initialized. This makes page_cgroup_init() for SPARSEMEM node aware and remove a code to get nid from page->flags. (Then, we'll use valid NID always.) [akpm@linux-foundation.org: try to fix up comments] Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 71 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 74ccff61d1be..53bffc6c293e 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -162,13 +162,13 @@ static void free_page_cgroup(void *addr) } #endif -static int __meminit init_section_page_cgroup(unsigned long pfn) +static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) { struct page_cgroup *base, *pc; struct mem_section *section; unsigned long table_size; unsigned long nr; - int nid, index; + int index; nr = pfn_to_section_nr(pfn); section = __nr_to_section(nr); @@ -176,7 +176,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn) if (section->page_cgroup) return 0; - nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; base = alloc_page_cgroup(table_size, nid); @@ -196,7 +195,11 @@ static int __meminit init_section_page_cgroup(unsigned long pfn) pc = base + index; init_page_cgroup(pc, nr); } - + /* + * The passed "pfn" may not be aligned to SECTION. For the calculation + * we need to apply a mask. + */ + pfn &= PAGE_SECTION_MASK; section->page_cgroup = base - pfn; total_usage += table_size; return 0; @@ -225,10 +228,20 @@ int __meminit online_page_cgroup(unsigned long start_pfn, start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); + if (nid == -1) { + /* + * In this case, "nid" already exists and contains valid memory. + * "start_pfn" passed to us is a pfn which is an arg for + * online__pages(), and start_pfn should exist. + */ + nid = pfn_to_nid(start_pfn); + VM_BUG_ON(!node_state(nid, N_ONLINE)); + } + for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; - fail = init_section_page_cgroup(pfn); + fail = init_section_page_cgroup(pfn, nid); } if (!fail) return 0; @@ -284,25 +297,47 @@ static int __meminit page_cgroup_callback(struct notifier_block *self, void __init page_cgroup_init(void) { unsigned long pfn; - int fail = 0; + int nid; if (mem_cgroup_disabled()) return; - for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { - if (!pfn_present(pfn)) - continue; - fail = init_section_page_cgroup(pfn); - } - if (fail) { - printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); - panic("Out of memory"); - } else { - hotplug_memory_notifier(page_cgroup_callback, 0); + for_each_node_state(nid, N_HIGH_MEMORY) { + unsigned long start_pfn, end_pfn; + + start_pfn = node_start_pfn(nid); + end_pfn = node_end_pfn(nid); + /* + * start_pfn and end_pfn may not be aligned to SECTION and the + * page->flags of out of node pages are not initialized. So we + * scan [start_pfn, the biggest section's pfn < end_pfn) here. + */ + for (pfn = start_pfn; + pfn < end_pfn; + pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { + + if (!pfn_valid(pfn)) + continue; + /* + * Nodes's pfns can be overlapping. + * We know some arch can have a nodes layout such as + * -------------pfn--------------> + * N0 | N1 | N2 | N0 | N1 | N2|.... + */ + if (pfn_to_nid(pfn) != nid) + continue; + if (init_section_page_cgroup(pfn, nid)) + goto oom; + } } + hotplug_memory_notifier(page_cgroup_callback, 0); printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" - " want memory cgroups\n"); + printk(KERN_INFO "please try 'cgroup_disable=memory' option if you " + "don't want memory cgroups\n"); + return; +oom: + printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); + panic("Out of memory"); } void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) From 733eda7ac316cd4e550fa096e4ed42356dc546e7 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:43 -0700 Subject: [PATCH 142/327] memcg: clear mm->owner when last possible owner leaves The following crash was reported: > Call Trace: > [] mem_cgroup_from_task+0x15/0x17 > [] __mem_cgroup_try_charge+0x148/0x4b4 > [] ? need_resched+0x23/0x2d > [] ? preempt_schedule+0x46/0x4f > [] mem_cgroup_charge_common+0x9a/0xce > [] mem_cgroup_newpage_charge+0x5d/0x5f > [] khugepaged+0x5da/0xfaf > [] ? __init_waitqueue_head+0x4b/0x4b > [] ? add_mm_counter.constprop.5+0x13/0x13 > [] kthread+0xa8/0xb0 > [] ? sub_preempt_count+0xa1/0xb4 > [] kernel_thread_helper+0x4/0x10 > [] ? retint_restore_args+0x13/0x13 > [] ? __init_kthread_worker+0x5a/0x5a What happens is that khugepaged tries to charge a huge page against an mm whose last possible owner has already exited, and the memory controller crashes when the stale mm->owner is used to look up the cgroup to charge. mm->owner has never been set to NULL with the last owner going away, but nobody cared until khugepaged came along. Even then it wasn't a problem because the final mmput() on an mm was forced to acquire and release mmap_sem in write-mode, preventing an exiting owner to go away while the mmap_sem was held, and until "692e0b3 mm: thp: optimize memcg charge in khugepaged", the memory cgroup charge was protected by mmap_sem in read-mode. Instead of going back to relying on the mmap_sem to enforce lifetime of a task, this patch ensures that mm->owner is properly set to NULL when the last possible owner is exiting, which the memory controller can handle just fine. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Hugh Dickins Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Johannes Weiner Reported-by: Hugh Dickins Reported-by: Dave Jones Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 20a406471525..f2b321bae440 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -561,29 +561,28 @@ void exit_files(struct task_struct *tsk) #ifdef CONFIG_MM_OWNER /* - * Task p is exiting and it owned mm, lets find a new owner for it + * A task is exiting. If it owned this mm, find a new owner for the mm. */ -static inline int -mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) -{ - /* - * If there are other users of the mm and the owner (us) is exiting - * we need to find a new owner to take on the responsibility. - */ - if (atomic_read(&mm->mm_users) <= 1) - return 0; - if (mm->owner != p) - return 0; - return 1; -} - void mm_update_next_owner(struct mm_struct *mm) { struct task_struct *c, *g, *p = current; retry: - if (!mm_need_new_owner(mm, p)) + /* + * If the exiting or execing task is not the owner, it's + * someone else's problem. + */ + if (mm->owner != p) return; + /* + * The current owner is exiting/execing and there are no other + * candidates. Do not leave the mm pointing to a possibly + * freed task structure. + */ + if (atomic_read(&mm->mm_users) <= 1) { + mm->owner = NULL; + return; + } read_lock(&tasklist_lock); /* From 7ae534d074e01e54d5cfbc9734b73fdfc855501f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:44 -0700 Subject: [PATCH 143/327] memcg: fix wrong check of noswap with softlimit Hierarchical reclaim doesn't swap out if memsw and resource limits are thye same (memsw_is_minimum == true) because we would hit mem+swap limit anyway (during hard limit reclaim). If it comes to the soft limit we shouldn't consider memsw_is_minimum at all because it doesn't make much sense. Either the soft limit is bellow the hard limit and then we cannot hit mem+swap limit or the direct reclaim takes a precedence. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Acked-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 02a7947608ad..0b1a32cbd74d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1663,7 +1663,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; /* If memsw_is_minimum==1, swap-out is of-no-use. */ - if (root_mem->memsw_is_minimum) + if (!check_soft && root_mem->memsw_is_minimum) noswap = true; while (1) { From 26fe616844491a41a1abc02e29f7a9d1ec2f8ddb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:45 -0700 Subject: [PATCH 144/327] memcg: fix percpu cached charge draining frequency For performance, memory cgroup caches some "charge" from res_counter into per cpu cache. This works well but because it's cache, it needs to be flushed in some cases. Typical cases are 1. when someone hit limit. 2. when rmdir() is called and need to charges to be 0. But "1" has problem. Recently, with large SMP machines, we see many kworker runs because of flushing memcg's cache. Bad things in implementation are that even if a cpu contains a cache for memcg not related to a memcg which hits limit, drain code is called. This patch does A) check percpu cache contains a useful data or not. B) check other asynchronous percpu draining doesn't run. C) don't call local cpu callback. (*)This patch avoid changing the calling condition with hard-limit. When I run "cat 1Gfile > /dev/null" under 300M limit memcg, [Before] 13767 kamezawa 20 0 98.6m 424 416 D 10.0 0.0 0:00.61 cat 58 root 20 0 0 0 0 S 0.6 0.0 0:00.09 kworker/2:1 60 root 20 0 0 0 0 S 0.6 0.0 0:00.08 kworker/4:1 4 root 20 0 0 0 0 S 0.3 0.0 0:00.02 kworker/0:0 57 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/1:1 61 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/5:1 62 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/6:1 63 root 20 0 0 0 0 S 0.3 0.0 0:00.05 kworker/7:1 [After] 2676 root 20 0 98.6m 416 416 D 9.3 0.0 0:00.87 cat 2626 kamezawa 20 0 15192 1312 920 R 0.3 0.0 0:00.28 top 1 root 20 0 19384 1496 1204 S 0.0 0.0 0:00.66 init 2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd 3 root 20 0 0 0 0 S 0.0 0.0 0:00.00 ksoftirqd/0 4 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0 [akpm@linux-foundation.org: make percpu_charge_mutex static, tweak comments] Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Daisuke Nishimura Reviewed-by: Michal Hocko Tested-by: Ying Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 54 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0b1a32cbd74d..c39a177bb641 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -359,7 +359,7 @@ enum charge_type { static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); -static void drain_all_stock_async(void); +static void drain_all_stock_async(struct mem_cgroup *mem); static struct mem_cgroup_per_zone * mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) @@ -1671,7 +1671,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (victim == root_mem) { loop++; if (loop >= 1) - drain_all_stock_async(); + drain_all_stock_async(root_mem); if (loop >= 2) { /* * If we have not been able to reclaim @@ -1934,9 +1934,11 @@ struct memcg_stock_pcp { struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; struct work_struct work; + unsigned long flags; +#define FLUSHING_CACHED_CHARGE (0) }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); -static atomic_t memcg_drain_count; +static DEFINE_MUTEX(percpu_charge_mutex); /* * Try to consume stocked charge on this cpu. If success, one page is consumed @@ -1984,6 +1986,7 @@ static void drain_local_stock(struct work_struct *dummy) { struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock); drain_stock(stock); + clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); } /* @@ -2008,26 +2011,45 @@ static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) * expects some charges will be back to res_counter later but cannot wait for * it. */ -static void drain_all_stock_async(void) +static void drain_all_stock_async(struct mem_cgroup *root_mem) { - int cpu; - /* This function is for scheduling "drain" in asynchronous way. - * The result of "drain" is not directly handled by callers. Then, - * if someone is calling drain, we don't have to call drain more. - * Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if - * there is a race. We just do loose check here. + int cpu, curcpu; + /* + * If someone calls draining, avoid adding more kworker runs. */ - if (atomic_read(&memcg_drain_count)) + if (!mutex_trylock(&percpu_charge_mutex)) return; /* Notify other cpus that system-wide "drain" is running */ - atomic_inc(&memcg_drain_count); get_online_cpus(); + /* + * Get a hint for avoiding draining charges on the current cpu, + * which must be exhausted by our charging. It is not required that + * this be a precise check, so we use raw_smp_processor_id() instead of + * getcpu()/putcpu(). + */ + curcpu = raw_smp_processor_id(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); - schedule_work_on(cpu, &stock->work); + struct mem_cgroup *mem; + + if (cpu == curcpu) + continue; + + mem = stock->cached; + if (!mem) + continue; + if (mem != root_mem) { + if (!root_mem->use_hierarchy) + continue; + /* check whether "mem" is under tree of "root_mem" */ + if (!css_is_ancestor(&mem->css, &root_mem->css)) + continue; + } + if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) + schedule_work_on(cpu, &stock->work); } put_online_cpus(); - atomic_dec(&memcg_drain_count); + mutex_unlock(&percpu_charge_mutex); /* We don't wait for flush_work */ } @@ -2035,9 +2057,9 @@ static void drain_all_stock_async(void) static void drain_all_stock_sync(void) { /* called when force_empty is called */ - atomic_inc(&memcg_drain_count); + mutex_lock(&percpu_charge_mutex); schedule_on_each_cpu(drain_local_stock); - atomic_dec(&memcg_drain_count); + mutex_unlock(&percpu_charge_mutex); } /* From fbc29a25e484be073e7d762c9f7f1d4bf8aecc48 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:46 -0700 Subject: [PATCH 145/327] memcg: avoid percpu cached charge draining at softlimit Based on Michal Hocko's comment. We are not draining per cpu cached charges during soft limit reclaim because background reclaim doesn't care about charges. It tries to free some memory and charges will not give any. Cached charges might influence only selection of the biggest soft limit offender but as the call is done only after the selection has been already done it makes no change. Signed-off-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c39a177bb641..cf7d027a8844 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1670,7 +1670,13 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, victim = mem_cgroup_select_victim(root_mem); if (victim == root_mem) { loop++; - if (loop >= 1) + /* + * We are not draining per cpu cached charges during + * soft limit reclaim because global reclaim doesn't + * care about charges. It tries to free some memory and + * charges will not give any. + */ + if (!check_soft && loop >= 1) drain_all_stock_async(root_mem); if (loop >= 2) { /* From b0461a44a2f1fc052fc949ae19c3a5d684627b09 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 15 Jun 2011 15:08:46 -0700 Subject: [PATCH 146/327] MAINTAINERS: add entry for legacy eeprom driver I shall maintain the legacy eeprom driver, until we finally get rid of it. Signed-off-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 518442ab25af..6c59eb90fdf4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3819,6 +3819,12 @@ S: Maintained F: drivers/leds/ F: include/linux/leds.h +LEGACY EEPROM DRIVER +M: Jean Delvare +S: Maintained +F: Documentation/misc-devices/eeprom +F: drivers/misc/eeprom/eeprom.c + LEGO USB Tower driver M: Juergen Stuber L: legousb-devel@lists.sourceforge.net From d2c32258798f813dc2be6cbc32f78aa5ac5cb205 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 15 Jun 2011 15:08:47 -0700 Subject: [PATCH 147/327] gcov: disable CONFIG_CONSTRUCTORS when not needed by CONFIG_GCOV_KERNEL CONFIG_CONSTRUCTORS controls support for running constructor functions at kernel init time. According to commit b99b87f70c7785ab ("kernel: constructor support"), gcov (CONFIG_GCOV_KERNEL) needs this. However, CONFIG_CONSTRUCTORS currently defaults to y, with no option to disable it, and CONFIG_GCOV_KERNEL depends on it. Instead, default it to n and have CONFIG_GCOV_KERNEL select it, so that the normal case of CONFIG_GCOV_KERNEL=n will result in CONFIG_CONSTRUCTORS=n. Observed in the short list of =y values in a minimal kernel configuration. Signed-off-by: Josh Triplett Acked-by: WANG Cong Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 1 - kernel/gcov/Kconfig | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index e0a22e42d39a..412c21b00d51 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -19,7 +19,6 @@ config DEFCONFIG_LIST config CONSTRUCTORS bool depends on !UML - default y config HAVE_IRQ_WORK bool diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index b8cadf70b1fb..5bf924d80b5c 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -2,7 +2,8 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" - depends on DEBUG_FS && CONSTRUCTORS + depends on DEBUG_FS + select CONSTRUCTORS default n ---help--- This option enables gcov-based code profiling (e.g. for code coverage From 5db8a73a8d7cc6a66afbf25ed7fda338caa8f5f9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Jun 2011 15:08:48 -0700 Subject: [PATCH 148/327] mm/memory-failure.c: fix page isolated count mismatch Pages isolated for migration are accounted with the vmstat counters NR_ISOLATE_[ANON|FILE]. Callers of migrate_pages() are expected to increment these counters when pages are isolated from the LRU. Once the pages have been migrated, they are put back on the LRU or freed and the isolated count is decremented. Memory failure is not properly accounting for pages it isolates causing the NR_ISOLATED counters to be negative. On SMP builds, this goes unnoticed as negative counters are treated as 0 due to expected per-cpu drift. On UP builds, the counter is treated by too_many_isolated() as a large value causing processes to enter D state during page reclaim or compaction. This patch accounts for pages isolated by memory failure correctly. [mel@csn.ul.ie: rewrote changelog] Reviewed-by: Andrea Arcangeli Signed-off-by: Minchan Kim Cc: Andi Kleen Acked-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5c8f7e08928d..eac0ba561491 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1468,7 +1469,8 @@ int soft_offline_page(struct page *page, int flags) put_page(page); if (!ret) { LIST_HEAD(pagelist); - + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, true); From a582a738c763e106f47eab24b8146c698a9c700b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 15 Jun 2011 15:08:49 -0700 Subject: [PATCH 149/327] compaction: checks correct fragmentation index fragmentation_index() returns -1000 when the allocation might succeed This doesn't match the comment and code in compaction_suitable(). I thought compaction_suitable should return COMPACT_PARTIAL in -1000 case, because in this case allocation could succeed depending on watermarks. The impact of this is that compaction starts and compact_finished() is called which rechecks the watermarks and the free lists. It should have the same result in that compaction should not start but is more expensive. Acked-by: Mel Gorman Signed-off-by: Shaohua Li Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 94bdbe1f7caf..cf7086c6dc07 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -480,7 +480,8 @@ unsigned long compaction_suitable(struct zone *zone, int order) * fragmentation index determines if allocation failures are due to * low memory or external fragmentation * - * index of -1 implies allocations might succeed dependingon watermarks + * index of -1000 implies allocations might succeed depending on + * watermarks * index towards 0 implies failure is due to lack of memory * index towards 1000 implies failure is due to fragmentation * @@ -490,7 +491,8 @@ unsigned long compaction_suitable(struct zone *zone, int order) if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) return COMPACT_SKIPPED; - if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) + if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark, + 0, 0)) return COMPACT_PARTIAL; return COMPACT_CONTINUE; From 7454f4ba40b419eb999a3c61a99da662bf1a2bb8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Jun 2011 15:08:50 -0700 Subject: [PATCH 150/327] mm: compaction: ensure that the compaction free scanner does not move to the next zone Compaction works with two scanners, a migration and a free scanner. When the scanners crossover, migration within the zone is complete. The location of the scanner is recorded on each cycle to avoid excesive scanning. When a zone is small and mostly reserved, it's very easy for the migration scanner to be close to the end of the zone. Then the following situation can occurs o migration scanner isolates some pages near the end of the zone o free scanner starts at the end of the zone but finds that the migration scanner is already there o free scanner gets reinitialised for the next cycle as cc->migrate_pfn + pageblock_nr_pages moving the free scanner into the next zone o migration scanner moves into the next zone When this happens, NR_ISOLATED accounting goes haywire because some of the accounting happens against the wrong zone. One zones counter remains positive while the other goes negative even though the overall global count is accurate. This was reported on X86-32 with !SMP because !SMP allows the negative counters to be visible. The fact that it is the bug should theoritically be possible there. Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index cf7086c6dc07..9b94eaf537e4 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -144,9 +144,20 @@ static void isolate_freepages(struct zone *zone, int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; + /* + * Initialise the free scanner. The starting point is where we last + * scanned from (or the end of the zone if starting). The low point + * is the end of the pageblock the migration scanner is using. + */ pfn = cc->free_pfn; low_pfn = cc->migrate_pfn + pageblock_nr_pages; - high_pfn = low_pfn; + + /* + * Take care that if the migration scanner is at the end of the zone + * that the free scanner does not accidentally move to the next zone + * in the next isolation cycle. + */ + high_pfn = min(low_pfn, pfn); /* * Isolate free pages until enough are available to migrate the From d179e84ba5da1d0024087d1759a2938817a00f3f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 15 Jun 2011 15:08:51 -0700 Subject: [PATCH 151/327] mm: vmscan: do not use page_count without a page pin It is unsafe to run page_count during the physical pfn scan because compound_head could trip on a dangling pointer when reading page->first_page if the compound page is being freed by another CPU. [mgorman@suse.de: split out patch] Signed-off-by: Andrea Arcangeli Signed-off-by: Mel Gorman Reviewed-by: Michal Hocko Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index dbe6ea321df4..8ff834e19c24 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1124,8 +1124,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, nr_lumpy_dirty++; scan++; } else { - /* the page is freed already. */ - if (!page_count(cursor_page)) + /* + * Check if the page is freed already. + * + * We can't use page_count() as that + * requires compound_head and we don't + * have a pin on the page here. If a + * page is tail, we may or may not + * have isolated the head, so assume + * it's not free, it'd be tricky to + * track the head status without a + * page pin. + */ + if (!PageTail(cursor_page) && + !atomic_read(&cursor_page->_count)) continue; break; } From f9e35b3b41f47c4e17d8132edbcab305a6aaa4b0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Jun 2011 15:08:52 -0700 Subject: [PATCH 152/327] mm: compaction: abort compaction if too many pages are isolated and caller is asynchronous V2 Asynchronous compaction is used when promoting to huge pages. This is all very nice but if there are a number of processes in compacting memory, a large number of pages can be isolated. An "asynchronous" process can stall for long periods of time as a result with a user reporting that firefox can stall for 10s of seconds. This patch aborts asynchronous compaction if too many pages are isolated as it's better to fail a hugepage promotion than stall a process. [minchan.kim@gmail.com: return COMPACT_PARTIAL for abort] Reported-and-tested-by: Ury Stankevich Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 9b94eaf537e4..6cc604bd5649 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -251,11 +251,18 @@ static bool too_many_isolated(struct zone *zone) return isolated > (inactive + active) / 2; } +/* possible outcome of isolate_migratepages */ +typedef enum { + ISOLATE_ABORT, /* Abort compaction now */ + ISOLATE_NONE, /* No pages isolated, continue scanning */ + ISOLATE_SUCCESS, /* Pages isolated, migrate */ +} isolate_migrate_t; + /* * Isolate all pages that can be migrated from the block pointed to by * the migrate scanner within compact_control. */ -static unsigned long isolate_migratepages(struct zone *zone, +static isolate_migrate_t isolate_migratepages(struct zone *zone, struct compact_control *cc) { unsigned long low_pfn, end_pfn; @@ -272,7 +279,7 @@ static unsigned long isolate_migratepages(struct zone *zone, /* Do not cross the free scanner or scan within a memory hole */ if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { cc->migrate_pfn = end_pfn; - return 0; + return ISOLATE_NONE; } /* @@ -281,10 +288,14 @@ static unsigned long isolate_migratepages(struct zone *zone, * delay for some time until fewer pages are isolated */ while (unlikely(too_many_isolated(zone))) { + /* async migration should just abort */ + if (!cc->sync) + return ISOLATE_ABORT; + congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) - return 0; + return ISOLATE_ABORT; } /* Time to isolate some pages for migration */ @@ -369,7 +380,7 @@ static unsigned long isolate_migratepages(struct zone *zone, trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - return cc->nr_migratepages; + return ISOLATE_SUCCESS; } /* @@ -535,8 +546,15 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) unsigned long nr_migrate, nr_remaining; int err; - if (!isolate_migratepages(zone, cc)) + switch (isolate_migratepages(zone, cc)) { + case ISOLATE_ABORT: + ret = COMPACT_PARTIAL; + goto out; + case ISOLATE_NONE: continue; + case ISOLATE_SUCCESS: + ; + } nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, @@ -560,6 +578,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) } +out: /* Release free pages and check accounting */ cc->nr_freepages -= release_freepages(&cc->freepages); VM_BUG_ON(cc->nr_freepages != 0); From 31b5f8eeece4c0d70b649bfac7759cf7e3f915dd Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Wed, 15 Jun 2011 15:08:52 -0700 Subject: [PATCH 153/327] Documentation/feature-removal-schedule.txt: remove ns_cgroup from feature-removal-schedule.txt Commit a77aea92010acf ("cgroup: remove the ns_cgroup") removed the ns_cgroup but it forgot to remove the related doc in feature-removal-schedule.txt. Signed-off-by: WANG Cong Cc: Daniel Lezcano Cc: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 1a9446b59153..72e238465b0b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -481,23 +481,6 @@ Who: FUJITA Tomonori ---------------------------- -What: namespace cgroup (ns_cgroup) -When: 2.6.38 -Why: The ns_cgroup leads to some problems: - * cgroup creation is out-of-control - * cgroup name can conflict when pids are looping - * it is not possible to have a single process handling - a lot of namespaces without falling in a exponential creation time - * we may want to create a namespace without creating a cgroup - - The ns_cgroup is replaced by a compatibility flag 'clone_children', - where a newly created cgroup will copy the parent cgroup values. - The userspace has to manually create a cgroup and add a task to - the 'tasks' file. -Who: Daniel Lezcano - ----------------------------- - What: iwlwifi disable_hw_scan module parameters When: 2.6.40 Why: Hareware scan is the prefer method for iwlwifi devices for From 273ef9509b7903e50f36aaf9f1d5dc9087fca506 Mon Sep 17 00:00:00 2001 From: Nils Carlson Date: Wed, 15 Jun 2011 15:08:54 -0700 Subject: [PATCH 154/327] drivers/char/hpet.c: fix periodic-emulation for delayed interrupts When interrupts are delayed due to interrupt masking or due to other interrupts being serviced the HPET periodic-emuation would fail. This happened because given an interval t and a time for the current interrupt m we would compute the next time as t + m. This works until we are delayed for > t, in which case we would be writing a new value which is in fact in the past. This can be solved by computing the next time instead as (k * t) + m where k is large enough to be in the future. The exact computation of k is described in a comment to the code. More detail: Assuming an interval of 5 between each expected interrupt we have a normal case of t0: interrupt, read t0 from comparator, set next interrupt t0 + 5 t5: interrupt, read t5 from comparator, set next interrupt t5 + 5 t10: interrupt, read t10 from comparator, set next interrupt t10 + 5 ... So, what happens when the interrupt is serviced too late? t0: interrupt, read t0 from comparator, set next interrupt t0 + 5 t11: delayed interrupt serviced, read t5 from comparator, set next interrupt t5 + 5, which is in the past! ... counter loops ... t10: Much much later, get the next interrupt. This can happen either because we have interrupts masked for too long (some stupid driver goes on a printk rampage) or just because we are pushing the limits of the interval (too small a period), or both most probably. My solution is to read the main counter as well and set the next interrupt to occur at the right interval, for example: t0: interrupt, read t0 from comparator, set next interrupt t0 + 5 t11: delayed interrupt serviced, read t5 from comparator, set next interrupt t15 as t10 has been missed. t15: back on track. Signed-off-by: Nils Carlson Cc: John Stultz Cc: Thomas Gleixner Cc: Clemens Ladisch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/hpet.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 051474c65b78..34d6a1cab8de 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -163,11 +163,32 @@ static irqreturn_t hpet_interrupt(int irq, void *data) * This has the effect of treating non-periodic like periodic. */ if ((devp->hd_flags & (HPET_IE | HPET_PERIODIC)) == HPET_IE) { - unsigned long m, t; + unsigned long m, t, mc, base, k; + struct hpet __iomem *hpet = devp->hd_hpet; + struct hpets *hpetp = devp->hd_hpets; t = devp->hd_ireqfreq; m = read_counter(&devp->hd_timer->hpet_compare); - write_counter(t + m, &devp->hd_timer->hpet_compare); + mc = read_counter(&hpet->hpet_mc); + /* The time for the next interrupt would logically be t + m, + * however, if we are very unlucky and the interrupt is delayed + * for longer than t then we will completely miss the next + * interrupt if we set t + m and an application will hang. + * Therefore we need to make a more complex computation assuming + * that there exists a k for which the following is true: + * k * t + base < mc + delta + * (k + 1) * t + base > mc + delta + * where t is the interval in hpet ticks for the given freq, + * base is the theoretical start value 0 < base < t, + * mc is the main counter value at the time of the interrupt, + * delta is the time it takes to write the a value to the + * comparator. + * k may then be computed as (mc - base + delta) / t . + */ + base = mc % t; + k = (mc - base + hpetp->hp_delta) / t; + write_counter(t * (k + 1) + base, + &devp->hd_timer->hpet_compare); } if (devp->hd_flags & HPET_SHARED_IRQ) From fb139dfeef9558a12ffdbf9e26951fd1a9304f3b Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 15 Jun 2011 15:08:55 -0700 Subject: [PATCH 155/327] drivers/tty/serial/pch_uart.c: don't oops if dmi_get_system_info returns NULL If dmi_get_system_info() returns NULL, pch_uart_init_port() will dereferencea a zero pointer. This oops was observed on an Atom based board which has no BIOS, but a bootloder which doesn't provide DMI data. Signed-off-by: Alexander Stein Cc: Greg KH Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/serial/pch_uart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index f2cb7503fcb2..465210930890 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -1397,6 +1397,7 @@ static struct eg20t_port *pch_uart_init_port(struct pci_dev *pdev, int fifosize, base_baud; int port_type; struct pch_uart_driver_data *board; + const char *board_name; board = &drv_dat[id->driver_data]; port_type = board->port_type; @@ -1412,7 +1413,8 @@ static struct eg20t_port *pch_uart_init_port(struct pci_dev *pdev, base_baud = 1843200; /* 1.8432MHz */ /* quirk for CM-iTC board */ - if (strstr(dmi_get_system_info(DMI_BOARD_NAME), "CM-iTC")) + board_name = dmi_get_system_info(DMI_BOARD_NAME); + if (board_name && strstr(board_name, "CM-iTC")) base_baud = 192000000; /* 192.0MHz */ switch (port_type) { From c7cbb02222eccb82bfd42696b01abceddae663f2 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Wed, 15 Jun 2011 15:08:56 -0700 Subject: [PATCH 156/327] rtc: fix build warnings in defconfigs RTC_CLASS is changed to bool, so 'm' is invalid. Signed-off-by: Wanlong Gao Acked-by: Mike Frysinger Acked-by: Wolfram Sang Acked-by: Hans-Christian Egtvedt Acked-by: Benjamin Herrenschmidt Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/configs/davinci_all_defconfig | 2 +- arch/arm/configs/netx_defconfig | 2 +- arch/arm/configs/viper_defconfig | 2 +- arch/arm/configs/xcep_defconfig | 2 +- arch/arm/configs/zeus_defconfig | 2 +- arch/avr32/configs/atngw100_mrmt_defconfig | 2 +- arch/blackfin/configs/CM-BF548_defconfig | 2 +- arch/mips/configs/mtx1_defconfig | 2 +- arch/powerpc/configs/52xx/pcm030_defconfig | 2 +- arch/powerpc/configs/ps3_defconfig | 2 +- arch/sh/configs/titan_defconfig | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 889922ad229c..67b5abb6f857 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -157,7 +157,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_XFS_FS=m diff --git a/arch/arm/configs/netx_defconfig b/arch/arm/configs/netx_defconfig index 316af5479d90..9c0ad7993986 100644 --- a/arch/arm/configs/netx_defconfig +++ b/arch/arm/configs/netx_defconfig @@ -60,7 +60,7 @@ CONFIG_FB_ARMCLCD=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_INOTIFY=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index 8b0c717378fa..1d01ddd33122 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -142,7 +142,7 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FILE_STORAGE=m CONFIG_USB_G_SERIAL=m CONFIG_USB_G_PRINTER=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=m CONFIG_RTC_DRV_SA1100=m CONFIG_EXT2_FS=m diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index 5b5504143647..721832ffe2d7 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -73,7 +73,7 @@ CONFIG_SENSORS_MAX6650=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SA1100=m CONFIG_DMADEVICES=y # CONFIG_DNOTIFY is not set diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig index 960f65514d88..59577ad3f4ef 100644 --- a/arch/arm/configs/zeus_defconfig +++ b/arch/arm/configs/zeus_defconfig @@ -158,7 +158,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_LEDS_TRIGGER_BACKLIGHT=m CONFIG_LEDS_TRIGGER_GPIO=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ISL1208=m CONFIG_RTC_DRV_PXA=m CONFIG_EXT2_FS=y diff --git a/arch/avr32/configs/atngw100_mrmt_defconfig b/arch/avr32/configs/atngw100_mrmt_defconfig index 6cb786c0baf6..77ca4f905d2c 100644 --- a/arch/avr32/configs/atngw100_mrmt_defconfig +++ b/arch/avr32/configs/atngw100_mrmt_defconfig @@ -110,7 +110,7 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S35390A=m CONFIG_RTC_DRV_AT32AP700X=m CONFIG_DMADEVICES=y diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig index 31d954216c05..9f1d08401fca 100644 --- a/arch/blackfin/configs/CM-BF548_defconfig +++ b/arch/blackfin/configs/CM-BF548_defconfig @@ -112,7 +112,7 @@ CONFIG_USB_G_SERIAL=m CONFIG_USB_G_PRINTER=m CONFIG_MMC=m CONFIG_SDH_BFIN=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_BFIN=m CONFIG_EXT2_FS=m # CONFIG_DNOTIFY is not set diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 37862b2ce363..807c97eed8a8 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -678,7 +678,7 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_TEST=m CONFIG_RTC_DRV_DS1307=m diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 7f7e4a878602..22e719575c60 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -85,7 +85,7 @@ CONFIG_USB_OHCI_HCD=m CONFIG_USB_OHCI_HCD_PPC_OF_BE=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8563=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 6472322bf13b..185c292b0f1c 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -141,7 +141,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=m CONFIG_USB_STORAGE=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PS3=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=m diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 0f558914e760..e2cbd92d520b 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -227,7 +227,7 @@ CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_ARK3116=m CONFIG_USB_SERIAL_PL2303=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y From 2b472611a32a72f4a118c069c2d62a1a3f087afd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 15 Jun 2011 15:08:58 -0700 Subject: [PATCH 157/327] ksm: fix NULL pointer dereference in scan_get_next_rmap_item() Andrea Righi reported a case where an exiting task can race against ksmd::scan_get_next_rmap_item (http://lkml.org/lkml/2011/6/1/742) easily triggering a NULL pointer dereference in ksmd. ksm_scan.mm_slot == &ksm_mm_head with only one registered mm CPU 1 (__ksm_exit) CPU 2 (scan_get_next_rmap_item) list_empty() is false lock slot == &ksm_mm_head list_del(slot->mm_list) (list now empty) unlock lock slot = list_entry(slot->mm_list.next) (list is empty, so slot is still ksm_mm_head) unlock slot->mm == NULL ... Oops Close this race by revalidating that the new slot is not simply the list head again. Andrea's test case: #include #include #include #include #define BUFSIZE getpagesize() int main(int argc, char **argv) { void *ptr; if (posix_memalign(&ptr, getpagesize(), BUFSIZE) < 0) { perror("posix_memalign"); exit(1); } if (madvise(ptr, BUFSIZE, MADV_MERGEABLE) < 0) { perror("madvise"); exit(1); } *(char *)NULL = 0; return 0; } Reported-by: Andrea Righi Tested-by: Andrea Righi Cc: Andrea Arcangeli Signed-off-by: Hugh Dickins Signed-off-by: Chris Wright Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/ksm.c b/mm/ksm.c index d708b3ef2260..9a68b0cf0a1c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1302,6 +1302,12 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list); ksm_scan.mm_slot = slot; spin_unlock(&ksm_mmlist_lock); + /* + * Although we tested list_empty() above, a racing __ksm_exit + * of the last mm on the list may have removed it since then. + */ + if (slot == &ksm_mm_head) + return NULL; next_mm: ksm_scan.address = 0; ksm_scan.rmap_list = &slot->rmap_list; From ec8f9ceacef719a844ca269d654502af6a00a273 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jun 2011 15:08:59 -0700 Subject: [PATCH 158/327] drivers/misc/apds990x.c: apds990x_chip_on() should depend on CONFIG_PM || CONFIG_PM_RUNTIME Fixes this warning: drivers/misc/apds990x.c: At top level: drivers/misc/apds990x.c:613: warning: `apds990x_chip_on' defined but not used Signed-off-by: Geert Uytterhoeven Cc: Samu Onkalo Cc: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/apds990x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c index 200311fea369..e2a52e5cf449 100644 --- a/drivers/misc/apds990x.c +++ b/drivers/misc/apds990x.c @@ -609,6 +609,7 @@ static int apds990x_detect(struct apds990x_chip *chip) return ret; } +#if defined(CONFIG_PM) || defined(CONFIG_PM_RUNTIME) static int apds990x_chip_on(struct apds990x_chip *chip) { int err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), @@ -624,6 +625,7 @@ static int apds990x_chip_on(struct apds990x_chip *chip) apds990x_mode_on(chip); return 0; } +#endif static int apds990x_chip_off(struct apds990x_chip *chip) { From 21c5977a836e399fc710ff2c5367845ed5c2527f Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 15 Jun 2011 15:09:01 -0700 Subject: [PATCH 159/327] alpha: fix several security issues Fix several security issues in Alpha-specific syscalls. Untested, but mostly trivial. 1. Signedness issue in osf_getdomainname allows copying out-of-bounds kernel memory to userland. 2. Signedness issue in osf_sysinfo allows copying large amounts of kernel memory to userland. 3. Typo (?) in osf_getsysinfo bounds minimum instead of maximum copy size, allowing copying large amounts of kernel memory to userland. 4. Usage of user pointer in osf_wait4 while under KERNEL_DS allows privilege escalation via writing return value of sys_wait4 to kernel memory. Signed-off-by: Dan Rosenberg Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/osf_sys.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 376f22130791..326f0a2d56e5 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -409,7 +409,7 @@ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) return -EFAULT; len = namelen; - if (namelen > 32) + if (len > 32) len = 32; down_read(&uts_sem); @@ -594,7 +594,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) down_read(&uts_sem); res = sysinfo_table[offset]; len = strlen(res)+1; - if (len > count) + if ((unsigned long)len > (unsigned long)count) len = count; if (copy_to_user(buf, res, len)) err = -EFAULT; @@ -649,7 +649,7 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, return 1; case GSI_GET_HWRPB: - if (nbytes < sizeof(*hwrpb)) + if (nbytes > sizeof(*hwrpb)) return -EINVAL; if (copy_to_user(buffer, hwrpb, nbytes) != 0) return -EFAULT; @@ -1008,6 +1008,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, { struct rusage r; long ret, err; + unsigned int status = 0; mm_segment_t old_fs; if (!ur) @@ -1016,13 +1017,15 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, old_fs = get_fs(); set_fs (KERNEL_DS); - ret = sys_wait4(pid, ustatus, options, (struct rusage __user *) &r); + ret = sys_wait4(pid, (unsigned int __user *) &status, options, + (struct rusage __user *) &r); set_fs (old_fs); if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) return -EFAULT; err = 0; + err |= put_user(status, ustatus); err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); From 04c55715cbd5de526046745bca3d3b6ffa6641c6 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 15 Jun 2011 12:57:09 -0700 Subject: [PATCH 160/327] Documentation: update printk-formats.txt This patch updates the incomplete documentation concerning the printk extended format specifiers. Signed-off-by: Andrew Murray Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/printk-formats.txt | 119 ++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 2 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 1b5a5ddbc3ef..5df176ed59b8 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier: size_t %zu or %zx ssize_t %zd or %zx -Raw pointer value SHOULD be printed with %p. +Raw pointer value SHOULD be printed with %p. The kernel supports +the following extended format specifiers for pointer types: + +Symbols/Function Pointers: + + %pF versatile_init+0x0/0x110 + %pf versatile_init + %pS versatile_init+0x0/0x110 + %ps versatile_init + %pB prev_fn_of_versatile_init+0x88/0x88 + + For printing symbols and function pointers. The 'S' and 's' specifiers + result in the symbol name with ('S') or without ('s') offsets. Where + this is used on a kernel without KALLSYMS - the symbol address is + printed instead. + + The 'B' specifier results in the symbol name with offsets and should be + used when printing stack backtraces. The specifier takes into + consideration the effect of compiler optimisations which may occur + when tail-call's are used and marked with the noreturn GCC attribute. + + On ia64, ppc64 and parisc64 architectures function pointers are + actually function descriptors which must first be resolved. The 'F' and + 'f' specifiers perform this resolution and then provide the same + functionality as the 'S' and 's' specifiers. + +Kernel Pointers: + + %pK 0x01234567 or 0x0123456789abcdef + + For printing kernel pointers which should be hidden from unprivileged + users. The behaviour of %pK depends on the kptr_restrict sysctl - see + Documentation/sysctl/kernel.txt for more details. + +Struct Resources: + + %pr [mem 0x60000000-0x6fffffff flags 0x2200] or + [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] + %pR [mem 0x60000000-0x6fffffff pref] or + [mem 0x0000000060000000-0x000000006fffffff pref] + + For printing struct resources. The 'R' and 'r' specifiers result in a + printed resource with ('R') or without ('r') a decoded flags member. + +MAC/FDDI addresses: + + %pM 00:01:02:03:04:05 + %pMF 00-01-02-03-04-05 + %pm 000102030405 + + For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm' + specifiers result in a printed address with ('M') or without ('m') byte + separators. The default byte separator is the colon (':'). + + Where FDDI addresses are concerned the 'F' specifier can be used after + the 'M' specifier to use dash ('-') separators instead of the default + separator. + +IPv4 addresses: + + %pI4 1.2.3.4 + %pi4 001.002.003.004 + %p[Ii][hnbl] + + For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4' + specifiers result in a printed address with ('i4') or without ('I4') + leading zeros. + + The additional 'h', 'n', 'b', and 'l' specifiers are used to specify + host, network, big or little endian order addresses respectively. Where + no specifier is provided the default network/big endian order is used. + +IPv6 addresses: + + %pI6 0001:0002:0003:0004:0005:0006:0007:0008 + %pi6 00010002000300040005000600070008 + %pI6c 1:2:3:4:5:6:7:8 + + For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6' + specifiers result in a printed address with ('I6') or without ('i6') + colon-separators. Leading zeros are always used. + + The additional 'c' specifier can be used with the 'I' specifier to + print a compressed IPv6 address as described by + http://tools.ietf.org/html/rfc5952 + +UUID/GUID addresses: + + %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f + %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F + %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f + %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F + + For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L', + 'b' and 'B' specifiers are used to specify a little endian order in + lower ('l') or upper case ('L') hex characters - and big endian order + in lower ('b') or upper case ('B') hex characters. + + Where no additional specifiers are used the default little endian + order with lower case hex characters will be printed. + +struct va_format: + + %pV + + For printing struct va_format structures. These contain a format string + and va_list as follows: + + struct va_format { + const char *fmt; + va_list *va; + }; + + Do not use this feature without some mechanism to verify the + correctness of the format string and va_list arguments. u64 SHOULD be printed with %llu/%llx, (unsigned long long): @@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t. Thank you for your cooperation and attention. -By Randy Dunlap +By Randy Dunlap and +Andrew Murray From 06a2c45d6b4a7586eba7cd20dd656b08d8b63c2f Mon Sep 17 00:00:00 2001 From: "Maxin B. John" Date: Wed, 15 Jun 2011 12:58:29 -0700 Subject: [PATCH 161/327] Documentation: update kmemleak supported archs Instead of listing the architectures that are supported by kmemleak in Documentation/kmemleak.txt, just refer people to the list of supported architecutures in lib/Kconfig.debug so that Documentation/kmemleak.txt does not need more updates for this. Signed-off-by: Maxin B. John Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/kmemleak.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index 090e6ee04536..51063e681ca4 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only reported via /sys/kernel/debug/kmemleak. A similar method is used by the Valgrind tool (memcheck --leak-check) to detect the memory leaks in user-space applications. -Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile. + +Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported +architectures. Usage ----- From f6e07d38078e82a6aeaae00bb134591ef5ac1167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sommer?= Date: Wed, 15 Jun 2011 12:59:45 -0700 Subject: [PATCH 162/327] Documentation: update cgroupfs mount point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to commit 676db4af0430 ("cgroupfs: create /sys/fs/cgroup to mount cgroupfs on") the canonical mountpoint for the cgroup filesystem is /sys/fs/cgroup. Hence, this should be used in the documentation. Signed-off-by: Jörg Sommer Acked-by: Paul Menage Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/accounting/cgroupstats.txt | 4 +- Documentation/cgroups/blkio-controller.txt | 29 +++++----- Documentation/cgroups/cgroups.txt | 58 ++++++++++++-------- Documentation/cgroups/cpuacct.txt | 19 +++---- Documentation/cgroups/cpusets.txt | 28 +++++----- Documentation/cgroups/devices.txt | 6 +- Documentation/cgroups/freezer-subsystem.txt | 20 +++---- Documentation/cgroups/memory.txt | 17 +++--- Documentation/scheduler/sched-design-CFS.txt | 7 ++- Documentation/scheduler/sched-rt-group.txt | 7 +-- Documentation/vm/hwpoison.txt | 6 +- 11 files changed, 108 insertions(+), 93 deletions(-) diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt index eda40fd39cad..d16a9849e60e 100644 --- a/Documentation/accounting/cgroupstats.txt +++ b/Documentation/accounting/cgroupstats.txt @@ -21,7 +21,7 @@ information will not be available. To extract cgroup statistics a utility very similar to getdelays.c has been developed, the sample output of the utility is shown below -~/balbir/cgroupstats # ./getdelays -C "/cgroup/a" +~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a" sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0 -~/balbir/cgroupstats # ./getdelays -C "/cgroup" +~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup" sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2 diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 465351d4cf85..b1b1bfadc9c0 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -28,16 +28,19 @@ cgroups. Here is what you can do. - Enable group scheduling in CFQ CONFIG_CFQ_GROUP_IOSCHED=y -- Compile and boot into kernel and mount IO controller (blkio). +- Compile and boot into kernel and mount IO controller (blkio); see + cgroups.txt, Why are cgroups needed?. - mount -t cgroup -o blkio none /cgroup + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/blkio + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - Create two cgroups - mkdir -p /cgroup/test1/ /cgroup/test2 + mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 - Set weights of group test1 and test2 - echo 1000 > /cgroup/test1/blkio.weight - echo 500 > /cgroup/test2/blkio.weight + echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight + echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight - Create two same size files (say 512MB each) on same disk (file1, file2) and launch two dd threads in different cgroup to read those files. @@ -46,12 +49,12 @@ cgroups. Here is what you can do. echo 3 > /proc/sys/vm/drop_caches dd if=/mnt/sdb/zerofile1 of=/dev/null & - echo $! > /cgroup/test1/tasks - cat /cgroup/test1/tasks + echo $! > /sys/fs/cgroup/blkio/test1/tasks + cat /sys/fs/cgroup/blkio/test1/tasks dd if=/mnt/sdb/zerofile2 of=/dev/null & - echo $! > /cgroup/test2/tasks - cat /cgroup/test2/tasks + echo $! > /sys/fs/cgroup/blkio/test2/tasks + cat /sys/fs/cgroup/blkio/test2/tasks - At macro level, first dd should finish first. To get more precise data, keep on looking at (with the help of script), at blkio.disk_time and @@ -68,13 +71,13 @@ Throttling/Upper Limit policy - Enable throttling in block layer CONFIG_BLK_DEV_THROTTLING=y -- Mount blkio controller - mount -t cgroup -o blkio none /cgroup/blkio +- Mount blkio controller (see cgroups.txt, Why are cgroups needed?) + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - Specify a bandwidth rate on particular device for root group. The format for policy is ": ". - echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device + echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.read_bps_device Above will put a limit of 1MB/second on reads happening for root group on device having major/minor number 8:16. @@ -149,7 +152,7 @@ Proportional weight policy files Following is the format. - #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device + # echo dev_maj:dev_minor weight > blkio.weight_device Configure weight=300 on /dev/sdb (8:16) in this cgroup # echo 8:16 300 > blkio.weight_device # cat blkio.weight_device diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 0ed99f08f1f3..15bca101ff62 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -138,7 +138,7 @@ With the ability to classify tasks differently for different resources the admin can easily set up a script which receives exec notifications and depending on who is launching the browser he can - # echo browser_pid > /mnt///tasks + # echo browser_pid > /sys/fs/cgroup///tasks With only a single hierarchy, he now would potentially have to create a separate cgroup for every browser launched and associate it with @@ -153,9 +153,9 @@ apps enhanced CPU power, With ability to write pids directly to resource classes, it's just a matter of : - # echo pid > /mnt/network//tasks + # echo pid > /sys/fs/cgroup/network//tasks (after some time) - # echo pid > /mnt/network//tasks + # echo pid > /sys/fs/cgroup/network//tasks Without this ability, he would have to split the cgroup into multiple separate ones and then associate the new cgroups with the @@ -310,21 +310,24 @@ subsystem, this is the case for the cpuset. To start a new job that is to be contained within a cgroup, using the "cpuset" cgroup subsystem, the steps are something like: - 1) mkdir /dev/cgroup - 2) mount -t cgroup -ocpuset cpuset /dev/cgroup - 3) Create the new cgroup by doing mkdir's and write's (or echo's) in - the /dev/cgroup virtual file system. - 4) Start a task that will be the "founding father" of the new job. - 5) Attach that task to the new cgroup by writing its pid to the - /dev/cgroup tasks file for that cgroup. - 6) fork, exec or clone the job tasks from this founding father task. + 1) mount -t tmpfs cgroup_root /sys/fs/cgroup + 2) mkdir /sys/fs/cgroup/cpuset + 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + 4) Create the new cgroup by doing mkdir's and write's (or echo's) in + the /sys/fs/cgroup virtual file system. + 5) Start a task that will be the "founding father" of the new job. + 6) Attach that task to the new cgroup by writing its pid to the + /sys/fs/cgroup/cpuset/tasks file for that cgroup. + 7) fork, exec or clone the job tasks from this founding father task. For example, the following sequence of commands will setup a cgroup named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, and then start a subshell 'sh' in that cgroup: - mount -t cgroup cpuset -ocpuset /dev/cgroup - cd /dev/cgroup + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/cpuset + mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset mkdir Charlie cd Charlie /bin/echo 2-3 > cpuset.cpus @@ -345,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup virtual filesystem. To mount a cgroup hierarchy with all available subsystems, type: -# mount -t cgroup xxx /dev/cgroup +# mount -t cgroup xxx /sys/fs/cgroup The "xxx" is not interpreted by the cgroup code, but will appear in /proc/mounts so may be any useful identifying string that you like. @@ -354,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance, if cpusets are enabled the user will have to populate the cpus and mems files for each new cgroup created before that group can be used. +As explained in section `1.2 Why are cgroups needed?' you should create +different hierarchies of cgroups for each single resource or group of +resources you want to control. Therefore, you should mount a tmpfs on +/sys/fs/cgroup and create directories for each cgroup resource or resource +group. + +# mount -t tmpfs cgroup_root /sys/fs/cgroup +# mkdir /sys/fs/cgroup/rg1 + To mount a cgroup hierarchy with just the cpuset and memory subsystems, type: -# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup +# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1 To change the set of subsystems bound to a mounted hierarchy, just remount with different options: -# mount -o remount,cpuset,blkio hier1 /dev/cgroup +# mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1 Now memory is removed from the hierarchy and blkio is added. Note this will add blkio to the hierarchy but won't remove memory or cpuset, because the new options are appended to the old ones: -# mount -o remount,blkio /dev/cgroup +# mount -o remount,blkio /sys/fs/cgroup/rg1 To Specify a hierarchy's release_agent: # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ - xxx /dev/cgroup + xxx /sys/fs/cgroup/rg1 Note that specifying 'release_agent' more than once will return failure. @@ -379,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting the ability to arbitrarily bind/unbind subsystems from an existing cgroup hierarchy is intended to be implemented in the future. -Then under /dev/cgroup you can find a tree that corresponds to the -tree of the cgroups in the system. For instance, /dev/cgroup +Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the +tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1 is the cgroup that holds the whole system. If you want to change the value of release_agent: -# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent +# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent It can also be changed via remount. -If you want to create a new cgroup under /dev/cgroup: -# cd /dev/cgroup +If you want to create a new cgroup under /sys/fs/cgroup/rg1: +# cd /sys/fs/cgroup/rg1 # mkdir my_cgroup Now you want to do something with this cgroup. diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index 8b930946c52a..9ad85df4b983 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt @@ -10,26 +10,25 @@ directly present in its group. Accounting groups can be created by first mounting the cgroup filesystem. -# mkdir /cgroups -# mount -t cgroup -ocpuacct none /cgroups +# mount -t cgroup -ocpuacct none /sys/fs/cgroup -With the above step, the initial or the parent accounting group -becomes visible at /cgroups. At bootup, this group includes all the -tasks in the system. /cgroups/tasks lists the tasks in this cgroup. -/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by -this group which is essentially the CPU time obtained by all the tasks +With the above step, the initial or the parent accounting group becomes +visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in +the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. +/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained +by this group which is essentially the CPU time obtained by all the tasks in the system. -New accounting groups can be created under the parent group /cgroups. +New accounting groups can be created under the parent group /sys/fs/cgroup. -# cd /cgroups +# cd /sys/fs/cgroup # mkdir g1 # echo $$ > g1 The above steps create a new group g1 and move the current shell process (bash) into it. CPU time consumed by this bash and its children can be obtained from g1/cpuacct.usage and the same is accumulated in -/cgroups/cpuacct.usage also. +/sys/fs/cgroup/cpuacct.usage also. cpuacct.stat file lists a few statistics which further divide the CPU time obtained by the cgroup into user and system times. Currently diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 98a30829af7a..5b0d78e55ccc 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt @@ -661,21 +661,21 @@ than stress the kernel. To start a new job that is to be contained within a cpuset, the steps are: - 1) mkdir /dev/cpuset - 2) mount -t cgroup -ocpuset cpuset /dev/cpuset + 1) mkdir /sys/fs/cgroup/cpuset + 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset 3) Create the new cpuset by doing mkdir's and write's (or echo's) in - the /dev/cpuset virtual file system. + the /sys/fs/cgroup/cpuset virtual file system. 4) Start a task that will be the "founding father" of the new job. 5) Attach that task to the new cpuset by writing its pid to the - /dev/cpuset tasks file for that cpuset. + /sys/fs/cgroup/cpuset tasks file for that cpuset. 6) fork, exec or clone the job tasks from this founding father task. For example, the following sequence of commands will setup a cpuset named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, and then start a subshell 'sh' in that cpuset: - mount -t cgroup -ocpuset cpuset /dev/cpuset - cd /dev/cpuset + mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset mkdir Charlie cd Charlie /bin/echo 2-3 > cpuset.cpus @@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset virtual filesystem. To mount it, type: -# mount -t cgroup -o cpuset cpuset /dev/cpuset +# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset -Then under /dev/cpuset you can find a tree that corresponds to the -tree of the cpusets in the system. For instance, /dev/cpuset +Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the +tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset is the cpuset that holds the whole system. -If you want to create a new cpuset under /dev/cpuset: -# cd /dev/cpuset +If you want to create a new cpuset under /sys/fs/cgroup/cpuset: +# cd /sys/fs/cgroup/cpuset # mkdir my_cpuset Now you want to do something with this cpuset. @@ -765,12 +765,12 @@ wrapper around the cgroup filesystem. The command -mount -t cpuset X /dev/cpuset +mount -t cpuset X /sys/fs/cgroup/cpuset is equivalent to -mount -t cgroup -ocpuset,noprefix X /dev/cpuset -echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent +mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset +echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent 2.2 Adding/removing cpus ------------------------ diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 57ca4c89fe5c..16624a7f8222 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt @@ -22,16 +22,16 @@ removed from the child(ren). An entry is added using devices.allow, and removed using devices.deny. For instance - echo 'c 1:3 mr' > /cgroups/1/devices.allow + echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow allows cgroup 1 to read and mknod the device usually known as /dev/null. Doing - echo a > /cgroups/1/devices.deny + echo a > /sys/fs/cgroup/1/devices.deny will remove the default 'a *:* rwm' entry. Doing - echo a > /cgroups/1/devices.allow + echo a > /sys/fs/cgroup/1/devices.allow will add the 'a *:* rwm' entry to the whitelist. diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index 41f37fea1276..c21d77742a07 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt @@ -59,28 +59,28 @@ is non-freezable. * Examples of usage : - # mkdir /containers - # mount -t cgroup -ofreezer freezer /containers - # mkdir /containers/0 - # echo $some_pid > /containers/0/tasks + # mkdir /sys/fs/cgroup/freezer + # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer + # mkdir /sys/fs/cgroup/freezer/0 + # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks to get status of the freezer subsystem : - # cat /containers/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state THAWED to freeze all tasks in the container : - # echo FROZEN > /containers/0/freezer.state - # cat /containers/0/freezer.state + # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state FREEZING - # cat /containers/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state FROZEN to unfreeze all tasks in the container : - # echo THAWED > /containers/0/freezer.state - # cat /containers/0/freezer.state + # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state THAWED This is the basic mechanism which should do the right thing for user space task diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 510d64570d60..ffec2416aa74 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -264,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS c. Enable CONFIG_CGROUP_MEM_RES_CTLR d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) -1. Prepare the cgroups -# mkdir -p /cgroups -# mount -t cgroup none /cgroups -o memory +1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) +# mount -t tmpfs none /sys/fs/cgroup +# mkdir /sys/fs/cgroup/memory +# mount -t cgroup none /sys/fs/cgroup/memory -o memory 2. Make the new group and move bash into it -# mkdir /cgroups/0 -# echo $$ > /cgroups/0/tasks +# mkdir /sys/fs/cgroup/memory/0 +# echo $$ > /sys/fs/cgroup/memory/0/tasks Since now we're in the 0 cgroup, we can alter the memory limit: -# echo 4M > /cgroups/0/memory.limit_in_bytes +# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) @@ -281,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). NOTE: We cannot set limits on the root cgroup any more. -# cat /cgroups/0/memory.limit_in_bytes +# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes 4194304 We can check the usage: -# cat /cgroups/0/memory.usage_in_bytes +# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes 1216512 A successful write to this file does not guarantee a successful set of diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 99961993257a..91ecff07cede 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each group created using the pseudo filesystem. See example steps below to create task groups and modify their CPU share using the "cgroups" pseudo filesystem. - # mkdir /dev/cpuctl - # mount -t cgroup -ocpu none /dev/cpuctl - # cd /dev/cpuctl + # mount -t tmpfs cgroup_root /sys/fs/cgroup + # mkdir /sys/fs/cgroup/cpu + # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu + # cd /sys/fs/cgroup/cpu # mkdir multimedia # create "multimedia" group of tasks # mkdir browser # create "browser" group of tasks diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 605b0d40329d..71b54d549987 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -129,9 +129,8 @@ priority! Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real CPU bandwidth to task groups. -This uses the /cgroup virtual file system and -"/cgroup//cpu.rt_runtime_us" to control the CPU time reserved for each -control group. +This uses the cgroup virtual file system and "/cpu.rt_runtime_us" +to control the CPU time reserved for each control group. For more information on working with control groups, you should read Documentation/cgroups/cgroups.txt as well. @@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans): =============== There is work in progress to make the scheduling period for each group -("/cgroup//cpu.rt_period_us") configurable as well. +("/cpu.rt_period_us") configurable as well. The constraint on the period is that a subgroup must have a smaller or equal period to its parent. But realistically its not very useful _yet_ diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt index 12f9ba20ccb7..550068466605 100644 --- a/Documentation/vm/hwpoison.txt +++ b/Documentation/vm/hwpoison.txt @@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number of the memcg. Example: - mkdir /cgroup/hwpoison + mkdir /sys/fs/cgroup/mem/hwpoison usemem -m 100 -s 1000 & - echo `jobs -p` > /cgroup/hwpoison/tasks + echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks - memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ') + memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ') echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg page-types -p `pidof init` --hwpoison # shall do nothing From 67de0162fbb78713fcb23cb2502b380faa8bde73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Sommer?= Date: Wed, 15 Jun 2011 13:00:47 -0700 Subject: [PATCH 163/327] Documentation: fix cgroup typos and formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix format and spelling. Signed-off-by: Jörg Sommer Acked-by: Paul Menage Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/cgroups/blkio-controller.txt | 2 +- Documentation/cgroups/cgroups.txt | 2 +- Documentation/cgroups/memory.txt | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index b1b1bfadc9c0..cd45c8ea7463 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -111,7 +111,7 @@ Hierarchical Cgroups CFQ and throttling will practically treat all groups at same level. pivot - / | \ \ + / / \ \ root test1 test2 test3 Down the line we can implement hierarchical accounting/control support diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 15bca101ff62..cd67e90003c0 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -142,7 +142,7 @@ and depending on who is launching the browser he can With only a single hierarchy, he now would potentially have to create a separate cgroup for every browser launched and associate it with -approp network and other resource class. This may lead to +appropriate network and other resource class. This may lead to proliferation of such cgroups. Also lets say that the administrator would like to give enhanced network diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index ffec2416aa74..06eb6d957c83 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -1,8 +1,8 @@ Memory Resource Controller -NOTE: The Memory Resource Controller has been generically been referred - to as the memory controller in this document. Do not confuse memory - controller used here with the memory controller that is used in hardware. +NOTE: The Memory Resource Controller has generically been referred to as the + memory controller in this document. Do not confuse memory controller + used here with the memory controller that is used in hardware. (For editors) In this document: @@ -182,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared page will eventually get charged for it (once it is uncharged from the cgroup that brought it in -- this will happen on memory pressure). -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. +Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. @@ -214,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from OS point of view. * What happens when a cgroup hits memory.memsw.limit_in_bytes -When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out +When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out in this cgroup. Then, swap-out will not be done by cgroup routine and file caches are dropped. But as mentioned above, global LRU can do swapout memory from it for sanity of the system's memory management state. You can't forbid @@ -491,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the cgroup filesystem. Consider for example, the following cgroup filesystem hierarchy - root + root / | \ - / | \ - a b c - | \ - | \ - d e + / | \ + a b c + | \ + | \ + d e In the diagram above, with hierarchical accounting enabled, all memory usage of e, is accounted to its ancestors up until the root (i.e, c and root), From 13fca640bb8ab611a50e0ba120b186faa2994d6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Jun 2011 21:53:52 -0700 Subject: [PATCH 164/327] Revert "fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP" This reverts commit 7f81c8890c15a10f5220bebae3b6dfae4961962a. It turns out that it's not actually a build-time check on x86-64 UML, which does some seriously crazy stuff with VM_STACK_FLAGS. The VM_STACK_FLAGS define depends on the arch-supplied VM_STACK_DEFAULT_FLAGS value, and on x86-64 UML we have arch/um/sys-x86_64/shared/sysdep/vm-flags.h: #define VM_STACK_DEFAULT_FLAGS \ (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) #define VM_STACK_DEFAULT_FLAGS vm_stack_flags (yes, seriously: two different #define's for that thing, with the first one being inside an "#ifdef TIF_IA32") It's possible that it is UML that should just be fixed in this area, but for now let's just undo the (very small) optimization. Reported-by: Randy Dunlap Acked-by: Andrew Morton Cc: Michal Hocko Cc: Richard Weinberger Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index b54f74f3cd80..97e0d52d72fd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; From fb2e73947461d55a3166f94a8a545b78d6635262 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Jun 2011 06:08:18 +0000 Subject: [PATCH 165/327] sh: ecovec: Add renesas_usbhs support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/sh/boards/mach-ecovec24/setup.c | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 3a32741cc0ac..513cb1a2e6c8 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -232,6 +233,52 @@ static struct platform_device usb1_common_device = { .resource = usb1_common_resources, }; +/* + * USBHS + */ +static int usbhs_get_id(struct platform_device *pdev) +{ + return gpio_get_value(GPIO_PTB3); +} + +static struct renesas_usbhs_platform_info usbhs_info = { + .platform_callback = { + .get_id = usbhs_get_id, + }, + .driver_param = { + .buswait_bwait = 4, + .detection_delay = 5, + }, +}; + +static struct resource usbhs_resources[] = { + [0] = { + .start = 0xa4d90000, + .end = 0xa4d90124 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 66, + .end = 66, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usbhs_device = { + .name = "renesas_usbhs", + .id = 1, + .dev = { + .dma_mask = NULL, /* not use dma */ + .coherent_dma_mask = 0xffffffff, + .platform_data = &usbhs_info, + }, + .num_resources = ARRAY_SIZE(usbhs_resources), + .resource = usbhs_resources, + .archdata = { + .hwblk_id = HWBLK_USB1, + }, +}; + /* LCDC */ const static struct fb_videomode ecovec_lcd_modes[] = { { @@ -897,6 +944,7 @@ static struct platform_device *ecovec_devices[] __initdata = { &sh_eth_device, &usb0_host_device, &usb1_common_device, + &usbhs_device, &lcdc_device, &ceu0_device, &ceu1_device, From 261a9af671a79b750cb170bac620601d686535c1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Jun 2011 06:08:28 +0000 Subject: [PATCH 166/327] sh: sh7724: Add USBHS DMAEngine support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/sh/include/cpu-sh4/cpu/sh7724.h | 8 ++++++ arch/sh/kernel/cpu/sh4a/setup-sh7724.c | 40 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h index 3daef8ecbc63..cbc47e6bcab5 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7724.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h @@ -298,6 +298,14 @@ enum { SHDMA_SLAVE_SCIF4_RX, SHDMA_SLAVE_SCIF5_TX, SHDMA_SLAVE_SCIF5_RX, + SHDMA_SLAVE_USB0D0_TX, + SHDMA_SLAVE_USB0D0_RX, + SHDMA_SLAVE_USB0D1_TX, + SHDMA_SLAVE_USB0D1_RX, + SHDMA_SLAVE_USB1D0_TX, + SHDMA_SLAVE_USB1D0_RX, + SHDMA_SLAVE_USB1D1_TX, + SHDMA_SLAVE_USB1D1_RX, SHDMA_SLAVE_SDHI0_TX, SHDMA_SLAVE_SDHI0_RX, SHDMA_SLAVE_SDHI1_TX, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 0333fe9e3881..134a397b1918 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -92,6 +92,46 @@ static const struct sh_dmae_slave_config sh7724_dmae_slaves[] = { .addr = 0xa4e50024, .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_8BIT), .mid_rid = 0x36, + }, { + .slave_id = SHDMA_SLAVE_USB0D0_TX, + .addr = 0xA4D80100, + .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0x73, + }, { + .slave_id = SHDMA_SLAVE_USB0D0_RX, + .addr = 0xA4D80100, + .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0x73, + }, { + .slave_id = SHDMA_SLAVE_USB0D1_TX, + .addr = 0xA4D80120, + .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0x77, + }, { + .slave_id = SHDMA_SLAVE_USB0D1_RX, + .addr = 0xA4D80120, + .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0x77, + }, { + .slave_id = SHDMA_SLAVE_USB1D0_TX, + .addr = 0xA4D90100, + .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0xab, + }, { + .slave_id = SHDMA_SLAVE_USB1D0_RX, + .addr = 0xA4D90100, + .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0xab, + }, { + .slave_id = SHDMA_SLAVE_USB1D1_TX, + .addr = 0xA4D90120, + .chcr = DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0xaf, + }, { + .slave_id = SHDMA_SLAVE_USB1D1_RX, + .addr = 0xA4D90120, + .chcr = DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL(XMIT_SZ_32BIT), + .mid_rid = 0xaf, }, { .slave_id = SHDMA_SLAVE_SDHI0_TX, .addr = 0x04ce0030, From a46e0899eec7a3069bcadd45dfba7bf67c6ed016 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Jun 2011 15:47:09 -0700 Subject: [PATCH 167/327] rcu: use softirq instead of kthreads except when RCU_BOOST=y This patch #ifdefs RCU kthreads out of the kernel unless RCU_BOOST=y, thus eliminating context-switch overhead if RCU priority boosting has not been configured. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 59 ++++++++++++++++++++++++++++++----------- kernel/rcutree.h | 8 ++++-- kernel/rcutree_plugin.h | 41 +++++++++++++++++----------- kernel/rcutree_trace.c | 32 +++++++++++++++------- 4 files changed, 99 insertions(+), 41 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ae5c9ea68662..429d4949f0eb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -87,6 +87,8 @@ static struct rcu_state *rcu_state; int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_RCU_BOOST + /* * Control variables for per-CPU and per-rcu_node kthreads. These * handle all flavors of RCU. @@ -98,9 +100,11 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; +#endif /* #ifdef CONFIG_RCU_BOOST */ + static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); -static void invoke_rcu_cpu_kthread(void); -static void __invoke_rcu_cpu_kthread(void); +static void invoke_rcu_core(void); +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1089,6 +1093,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; +#ifdef CONFIG_RCU_BOOST struct task_struct *t; /* Stop the CPU's kthread. */ @@ -1097,6 +1102,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) per_cpu(rcu_cpu_kthread_task, cpu) = NULL; kthread_stop(t); } +#endif /* #ifdef CONFIG_RCU_BOOST */ /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1232,7 +1238,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* Re-raise the RCU softirq if there are callbacks remaining. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } /* @@ -1278,7 +1284,7 @@ void rcu_check_callbacks(int cpu, int user) } rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } #ifdef CONFIG_SMP @@ -1444,9 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) /* If there are callbacks ready, invoke them. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - __invoke_rcu_cpu_kthread(); + invoke_rcu_callbacks(rsp, rdp); } +#ifdef CONFIG_RCU_BOOST + static void rcu_kthread_do_work(void) { rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1454,6 +1462,8 @@ static void rcu_kthread_do_work(void) rcu_preempt_do_callbacks(); } +#endif /* #ifdef CONFIG_RCU_BOOST */ + /* * Do softirq processing for the current CPU. */ @@ -1474,25 +1484,22 @@ static void rcu_process_callbacks(struct softirq_action *unused) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void __invoke_rcu_cpu_kthread(void) +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { - unsigned long flags; - - local_irq_save(flags); - __this_cpu_write(rcu_cpu_has_work, 1); - if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { - local_irq_restore(flags); + if (likely(!rsp->boost)) { + rcu_do_batch(rsp, rdp); return; } - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); - local_irq_restore(flags); + invoke_rcu_callbacks_kthread(); } -static void invoke_rcu_cpu_kthread(void) +static void invoke_rcu_core(void) { raise_softirq(RCU_SOFTIRQ); } +#ifdef CONFIG_RCU_BOOST + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1818,6 +1825,18 @@ static int __init rcu_spawn_kthreads(void) } early_initcall(rcu_spawn_kthreads); +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ +} + +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2224,6 +2243,8 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } +#ifdef CONFIG_RCU_BOOST + static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); @@ -2237,6 +2258,14 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } } +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + /* * Handle CPU online/offline notification events. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 0fed6b934d2a..434288c7ad88 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -369,6 +369,7 @@ struct rcu_state { /* period because */ /* force_quiescent_state() */ /* was running. */ + u8 boost; /* Subject to priority boost. */ unsigned long gpnum; /* Current gp number. */ unsigned long completed; /* # of last completed gp. */ @@ -439,7 +440,6 @@ static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); -static void rcu_preempt_do_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); @@ -451,11 +451,15 @@ static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); +static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); +static void invoke_rcu_callbacks_kthread(void); +#ifdef CONFIG_RCU_BOOST +static void rcu_preempt_do_callbacks(void); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, cpumask_var_t cm); -static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index); +#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 38d09c5f2b41..2772386c0421 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -602,11 +602,15 @@ static void rcu_preempt_process_callbacks(void) &__get_cpu_var(rcu_preempt_data)); } +#ifdef CONFIG_RCU_BOOST + static void rcu_preempt_do_callbacks(void) { rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); } +#endif /* #ifdef CONFIG_RCU_BOOST */ + /* * Queue a preemptible-RCU callback for invocation after a grace period. */ @@ -1002,10 +1006,6 @@ static void rcu_preempt_process_callbacks(void) { } -static void rcu_preempt_do_callbacks(void) -{ -} - /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. @@ -1257,6 +1257,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) } } +/* + * Wake up the per-CPU kthread to invoke RCU callbacks. + */ +static void invoke_rcu_callbacks_kthread(void) +{ + unsigned long flags; + + local_irq_save(flags); + __this_cpu_write(rcu_cpu_has_work, 1); + if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { + local_irq_restore(flags); + return; + } + wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); + local_irq_restore(flags); +} + /* * Set the affinity of the boost kthread. The CPU-hotplug locks are * held, so no one should be messing with the existence of the boost @@ -1297,6 +1314,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (&rcu_preempt_state != rsp) return 0; + rsp->boost = 1; if (rnp->boost_kthread_task != NULL) return 0; t = kthread_create(rcu_boost_kthread, (void *)rnp, @@ -1319,22 +1337,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) raw_spin_unlock_irqrestore(&rnp->lock, flags); } -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm) +static void invoke_rcu_callbacks_kthread(void) { + WARN_ON_ONCE(1); } static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP @@ -1509,7 +1520,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); * * Because it is not legal to invoke rcu_process_callbacks() with irqs * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked + * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. */ int rcu_needs_cpu(int cpu) @@ -1560,7 +1571,7 @@ int rcu_needs_cpu(int cpu) /* If RCU callbacks are still pending, RCU still needs this CPU. */ if (c) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); return c; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9678cc3650f5..4e144876dc68 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,8 @@ #define RCU_TREE_NONCORE #include "rcutree.h" +#ifdef CONFIG_RCU_BOOST + DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); @@ -58,6 +60,8 @@ static char convert_kthread_status(unsigned int kthread_status) return "SRWOY"[kthread_status]; } +#endif /* #ifdef CONFIG_RCU_BOOST */ + static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) @@ -76,7 +80,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld", + seq_printf(m, " ql=%ld qs=%c%c%c%c", rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], @@ -84,13 +88,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->nxttail[RCU_NEXT_READY_TAIL]], ".W"[rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_WAIT_TAIL]], - ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], + ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, " kt=%d/%c/%d ktl=%x", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, rdp->cpu)), per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), - per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff, - rdp->blimit); + per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_printf(m, " b=%ld", rdp->blimit); seq_printf(m, " ci=%lu co=%lu ca=%lu\n", rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } @@ -147,18 +154,21 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen, + seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], ".R"[rdp->nxttail[RCU_WAIT_TAIL] != rdp->nxttail[RCU_NEXT_READY_TAIL]], ".W"[rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_WAIT_TAIL]], - ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], + ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, ",%d,\"%c\"", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, - rdp->cpu)), - rdp->blimit); + rdp->cpu))); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_printf(m, ",%ld", rdp->blimit); seq_printf(m, ",%lu,%lu,%lu\n", rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } @@ -169,7 +179,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) #ifdef CONFIG_NO_HZ seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ - seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); + seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); +#ifdef CONFIG_RCU_BOOST + seq_puts(m, "\"kt\",\"ktl\""); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); #ifdef CONFIG_TREE_PREEMPT_RCU seq_puts(m, "\"rcu_preempt:\"\n"); PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); From 45e97ab65026a3391cb2c938f834ca5db4d2e5b3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 15 Jun 2011 11:26:47 +0200 Subject: [PATCH 168/327] drm: populate irq_by_busid-member for pci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8410ea (drm: rework PCI/platform driver interface) implemented drm_pci_irq_by_busid() but forgot to make it available in the drm_pci_bus-struct. This caused a freeze on my Radeon9600-equipped laptop when executing glxgears. Thanks to Michel for noticing the flaw. [airlied: made function static also] Reported-by: Michel Dänzer Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index e1aee4f6a7c6..b6a19cb07caf 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -251,7 +251,7 @@ err: } -int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) +static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) { if ((p->busnum >> 8) != drm_get_pci_domain(dev) || (p->busnum & 0xff) != dev->pdev->bus->number || @@ -292,6 +292,7 @@ static struct drm_bus drm_pci_bus = { .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, .set_unique = drm_pci_set_unique, + .irq_by_busid = drm_pci_irq_by_busid, .agp_init = drm_pci_agp_init, }; From 1c88d74f3a0b748b0c7e88e91e9d0503815d5689 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Jun 2011 19:15:53 +0000 Subject: [PATCH 169/327] drm/radeon/kms: signed fix for evergreen thermal temperature is signed. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 86157b172c88..7e3d96e7ac04 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -88,7 +88,8 @@ u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) /* get temperature in millidegrees */ int evergreen_get_temp(struct radeon_device *rdev) { - u32 temp, toffset, actual_temp = 0; + u32 temp, toffset; + int actual_temp = 0; if (rdev->family == CHIP_JUNIPER) { toffset = (RREG32(CG_THERMAL_CTRL) & TOFFSET_MASK) >> From 7c88d2b80ba5c175398013842782461a3b980130 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Jun 2011 15:27:38 +0000 Subject: [PATCH 170/327] drm/radeon/kms: be more pedantic about the g5 quirk (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't think Apple offered any other cards for this mac, so I doubt this will be an issue, but just to be on the safe side, check the pci ids as well. v2: fix spelling in commit message Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: Joachim Henke Cc: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_combios.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 797c8bcbb6a4..e4594676a07c 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -1553,9 +1553,12 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev) (rdev->pdev->subsystem_device == 0x4a48)) { /* Mac X800 */ rdev->mode_info.connector_table = CT_MAC_X800; - } else if (of_machine_is_compatible("PowerMac7,2") || - of_machine_is_compatible("PowerMac7,3")) { - /* Mac G5 9600 */ + } else if ((of_machine_is_compatible("PowerMac7,2") || + of_machine_is_compatible("PowerMac7,3")) && + (rdev->pdev->device == 0x4150) && + (rdev->pdev->subsystem_vendor == 0x1002) && + (rdev->pdev->subsystem_device == 0x4150)) { + /* Mac G5 tower 9600 */ rdev->mode_info.connector_table = CT_MAC_G5_9600; } else #endif /* CONFIG_PPC_PMAC */ From e6ba759980e65084b0db9f1684d9d65a2a3e1741 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 22:02:51 +0000 Subject: [PATCH 171/327] drm/radeon/kms: clear wb memory by default Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e680501c78ea..7cfaa7e2f3b5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -215,6 +215,8 @@ int radeon_wb_init(struct radeon_device *rdev) return r; } + /* clear wb memory */ + memset((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); /* disable event_write fences */ rdev->wb.use_event = false; /* disabled via module param */ From f49dadb82dde88092827b6d058e7164e75e96759 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 14 Jun 2011 06:13:54 +0000 Subject: [PATCH 172/327] drm: make debug levels match in edid failure code. this puts the header and followup at the same loglevel as the hex dump code. Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher --- drivers/gpu/drm/drm_edid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 0a9357c66ff8..3618d29c79a2 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -184,9 +184,9 @@ drm_edid_block_valid(u8 *raw_edid) bad: if (raw_edid) { - DRM_ERROR("Raw EDID:\n"); + printk(KERN_ERR "Raw EDID:\n"); print_hex_dump_bytes(KERN_ERR, DUMP_PREFIX_NONE, raw_edid, EDID_LENGTH); - printk("\n"); + printk(KERN_ERR "\n"); } return 0; } From 4a9a8b71e12d41abb71c4e741bff524f016cfef4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 14 Jun 2011 06:13:55 +0000 Subject: [PATCH 173/327] drm/radeon: workaround a hw bug on some radeon chipsets with all-0 EDIDs. Some RS690 chipsets seem to end up with floating connectors, either a DVI connector isn't actually populated, or an add-in HDMI card is available but not installed. In this case we seem to get a NULL byte response for each byte of the i2c transaction, so we detect this case and if we see it we don't do anymore DDC transactions on this connector. I've tested this on my RS690 without the HDMI card installed and it seems to work fine. Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher --- drivers/gpu/drm/drm_edid.c | 15 +++++++++++++++ drivers/gpu/drm/radeon/radeon_connectors.c | 7 +++++++ include/drm/drm_crtc.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3618d29c79a2..09292193dafe 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -258,6 +258,17 @@ drm_do_probe_ddc_edid(struct i2c_adapter *adapter, unsigned char *buf, return ret == 2 ? 0 : -1; } +static bool drm_edid_is_zero(u8 *in_edid, int length) +{ + int i; + u32 *raw_edid = (u32 *)in_edid; + + for (i = 0; i < length / 4; i++) + if (*(raw_edid + i) != 0) + return false; + return true; +} + static u8 * drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter) { @@ -273,6 +284,10 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter) goto out; if (drm_edid_block_valid(block)) break; + if (i == 0 && drm_edid_is_zero(block, EDID_LENGTH)) { + connector->null_edid_counter++; + goto carp; + } } if (i == 4) goto carp; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 9c2929c7e79f..c04e18ee8a87 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -836,6 +836,13 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) if (!radeon_connector->edid) { DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", drm_get_connector_name(connector)); + /* rs690 seems to have a problem with connectors not existing and always + * return a block of 0's. If we see this just stop polling on this output */ + if ((rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) && radeon_connector->base.null_edid_counter) { + ret = connector_status_disconnected; + DRM_ERROR("%s: detected RS690 floating bus bug, stopping ddc detect\n", drm_get_connector_name(connector)); + radeon_connector->ddc_bus = NULL; + } } else { radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 9573e0ce3120..33d12f87f0e0 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -520,6 +520,8 @@ struct drm_connector { uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; uint32_t force_encoder_id; struct drm_encoder *encoder; /* currently active encoder */ + + int null_edid_counter; /* needed to workaround some HW bugs where we get all 0s */ }; /** From cafe8d8413399119c3f4cd575e0eb27e2654b9d5 Mon Sep 17 00:00:00 2001 From: Christian Dietrich Date: Sat, 4 Jun 2011 15:36:43 +0000 Subject: [PATCH 174/327] drivers/gpu/drm: use printk_ratelimited instead of printk_ratelimit Since printk_ratelimit() shouldn't be used anymore (see comment in include/linux/printk.h), replace it with printk_ratelimited. Signed-off-by: Christian Dietrich Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_ioc32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index d61d185cf040..4a058c7af6c0 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -28,6 +28,7 @@ * IN THE SOFTWARE. */ #include +#include #include "drmP.h" #include "drm_core.h" @@ -253,10 +254,10 @@ static int compat_drm_addmap(struct file *file, unsigned int cmd, return -EFAULT; m32.handle = (unsigned long)handle; - if (m32.handle != (unsigned long)handle && printk_ratelimit()) - printk(KERN_ERR "compat_drm_addmap truncated handle" - " %p for type %d offset %x\n", - handle, m32.type, m32.offset); + if (m32.handle != (unsigned long)handle) + printk_ratelimited(KERN_ERR "compat_drm_addmap truncated handle" + " %p for type %d offset %x\n", + handle, m32.type, m32.offset); if (copy_to_user(argp, &m32, sizeof(m32))) return -EFAULT; From 9be34c9d526c305efb332ad53460b57d5f8edb3e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Jun 2011 00:35:09 -0700 Subject: [PATCH 175/327] mm: get rid of the most spurious find_vma_prev() users We have some users of this function that date back to before the vma list was doubly linked, and just are silly. These days, you can find the previous vma by just following the vma->vm_prev pointer. In some cases you don't need any find_vma() lookup at all, and in other cases you're better off with the regular "find_vma()" that uses the vma cache front-end lookup. Some "find_vma_prev()" users are still valid, though. For example, in the case of a stack that grows up, it can be the case that we don't find any 'vma' at all (because we're looking up an address that is past the last vma), and that the stack that we want to grow is the 'prev' vma. But that kind of special case aside, we generally should prefer to use 'find_vma()'. Noticed due to a totally unrelated POWER memory corruption bug that just happened to hit in 'find_vma_prev()' and made me go "Hmm - why are we using that function here?". Signed-off-by: Linus Torvalds --- mm/mmap.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index bbdc9af5e117..d49736ff8a8d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -906,14 +906,7 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) if (anon_vma) return anon_vma; try_prev: - /* - * It is potentially slow to have to call find_vma_prev here. - * But it's only on the first write fault on the vma, not - * every time, and we could devise a way to avoid it later - * (e.g. stash info in next's anon_vma_node when assigning - * an anon_vma, or when trying vma_merge). Another time. - */ - BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma); + near = vma->vm_prev; if (!near) goto none; @@ -2044,9 +2037,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return -EINVAL; /* Find the first overlapping VMA */ - vma = find_vma_prev(mm, start, &prev); + vma = find_vma(mm, start); if (!vma) return 0; + prev = vma->vm_prev; /* we have start < vma->vm_end */ /* if it doesn't overlap, we have nothing.. */ From 203db2952bc87f5d610c9ad53a7d02b85897721f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Jun 2011 23:03:38 +0200 Subject: [PATCH 176/327] tools/perf: Fix static build of perf tool To build a statically linked version of the perf tool all needed libraries must be added in the correct order to get the symbols resolved. Currently this is broken when, e.g. python or newt support is enabled -- libpython needs libpthread which is an unconditional link dependency of the perf tool; libslang needs libm, another unconditional dependency. To solve the problem in the long run without the need to keep track of transitive library dependencies, simply make the linker look at the EXTLIBS multiple times until it has all symbols resolved. Signed-off-by: Mathias Krause Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1308171818-20370-1-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 032ba6398a5c..940257b5774e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -633,7 +633,7 @@ prefix_SQ = $(subst ','\'',$(prefix)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) +LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group ALL_CFLAGS += $(BASIC_CFLAGS) ALL_CFLAGS += $(ARCH_CFLAGS) From c3a4924565e2eecf2539871abd123d35be6d76d5 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 16 Jun 2011 12:21:34 +0200 Subject: [PATCH 177/327] Revert "HID: magicmouse: ignore 'ivalid report id' while switching modes" This reverts commit 23746a66d7d9e73402c68ef00d708796b97ebd72. It turned out that the actual reason for failure is not the device firmware, but bug in Bluetooth stack, which will be fixed by patch by Ville Tervo which corrects the mask handling for CSR 1.1 Dongles. Reported-and-tested-by: Ed Tomlinson Reported-and-tested-by: Chase Douglas Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index a5eda4c8127a..0ec91c18a421 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -501,17 +501,9 @@ static int magicmouse_probe(struct hid_device *hdev, } report->size = 6; - /* - * The device reponds with 'invalid report id' when feature - * report switching it into multitouch mode is sent to it. - * - * This results in -EIO from the _raw low-level transport callback, - * but there seems to be no other way of switching the mode. - * Thus the super-ugly hacky success check below. - */ ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature), HID_FEATURE_REPORT); - if (ret != -EIO) { + if (ret != sizeof(feature)) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; } From 55b220cafadd71b9f83759f7b396998b2547dc5f Mon Sep 17 00:00:00 2001 From: Ambresh K Date: Wed, 15 Jun 2011 13:40:45 -0700 Subject: [PATCH 178/327] gpio/omap4: Fix missing interrupts during device wakeup due to IOPAD. If gpio pins from bank[2-5] are marked as wakeup enable and if the wake is through gpio IO pad wakeup, then that wakeup gpio interrupt is lost. In the current implementation, GPIO driver stores the context of DATAIN of all the gpio in the bank. During GPIO resuming, it checks DATAIN with wakeup enabled pins of gpio bank. If there is status change, then manually toggle GPIO_LEVELDETECT to generate pseudo interrupt. Reported-by: Philippe Mazet Tested-by: Philippe Mazet Signed-off-by: Ambresh K Signed-off-by: Grant Likely --- drivers/gpio/gpio-omap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 01f74a8459d9..35bebde23e83 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -469,8 +469,9 @@ static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio, + OMAP24XX_GPIO_CLEARWKUENA); } } - /* This part needs to be executed always for OMAP34xx */ - if (cpu_is_omap34xx() || (bank->non_wakeup_gpios & gpio_bit)) { + /* This part needs to be executed always for OMAP{34xx, 44xx} */ + if (cpu_is_omap34xx() || cpu_is_omap44xx() || + (bank->non_wakeup_gpios & gpio_bit)) { /* * Log the edge gpio and manually trigger the IRQ * after resume if the input level changes From 158f1e95180d01ebfd7cd5c8de23050528303f26 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 17:06:02 -0700 Subject: [PATCH 179/327] gpio: include linux/gpio.h where needed Some files use GPIOF_ macros but don't include the header file for them. These macros are being moved to , so add includes for where needed. Signed-off-by: Randy Dunlap Signed-off-by: Grant Likely --- arch/arm/mach-pxa/spitz_pm.c | 1 + drivers/pcmcia/pxa2xx_vpac270.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c index 7fe74067d85f..094279aefe9c 100644 --- a/arch/arm/mach-pxa/spitz_pm.c +++ b/arch/arm/mach-pxa/spitz_pm.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/pcmcia/pxa2xx_vpac270.c b/drivers/pcmcia/pxa2xx_vpac270.c index 435002dfc3ca..712baab3c83d 100644 --- a/drivers/pcmcia/pxa2xx_vpac270.c +++ b/drivers/pcmcia/pxa2xx_vpac270.c @@ -11,6 +11,7 @@ * */ +#include #include #include From c001fb72a7b705f902bdfdd05b5d2408efe6f848 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 17:05:11 -0700 Subject: [PATCH 180/327] gpio: add GPIOF_ values regardless on kconfig settings Make GPIOF_ defined values available even when GPIOLIB nor GENERIC_GPIO is enabled by moving them to . Fixes these build errors in linux-next: sound/soc/codecs/ak4641.c:524: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) sound/soc/codecs/wm8915.c:2921: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) Signed-off-by: Randy Dunlap Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 10 ---------- include/linux/gpio.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index fcdcb5d5c995..d494001b1226 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -170,16 +170,6 @@ extern int __gpio_cansleep(unsigned gpio); extern int __gpio_to_irq(unsigned gpio); -#define GPIOF_DIR_OUT (0 << 0) -#define GPIOF_DIR_IN (1 << 0) - -#define GPIOF_INIT_LOW (0 << 1) -#define GPIOF_INIT_HIGH (1 << 1) - -#define GPIOF_IN (GPIOF_DIR_IN) -#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) -#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) - /** * struct gpio - a structure describing a GPIO with configuration * @gpio: the GPIO number diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 32d47e710661..17b5a0d80e42 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -3,6 +3,17 @@ /* see Documentation/gpio.txt */ +/* make these flag values available regardless of GPIO kconfig options */ +#define GPIOF_DIR_OUT (0 << 0) +#define GPIOF_DIR_IN (1 << 0) + +#define GPIOF_INIT_LOW (0 << 1) +#define GPIOF_INIT_HIGH (1 << 1) + +#define GPIOF_IN (GPIOF_DIR_IN) +#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) +#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) + #ifdef CONFIG_GENERIC_GPIO #include From 63f6fe92c6b3cbf4c0bbbea4b31fdd3d68e21e4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 16 Jun 2011 17:16:37 +0200 Subject: [PATCH 181/327] netfilter: ip_tables: fix compile with debug Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 764743843503..24e556e83a3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name) const struct xt_entry_target *t; if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, par->match->name); + duprintf("ip check failed %p %s.\n", e, name); return -EINVAL; } From 58d5a0257d2fd89fbe4451f704193cc95b0a9c97 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:17 +0200 Subject: [PATCH 182/327] netfilter: ipt_ecn: fix protocol check in ecn_mt_check() Check for protocol inversion in ecn_mt_check() and remove the unnecessary runtime check for IPPROTO_TCP in ecn_mt(). Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index af6e9c778345..aaa85be1b2d8 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -76,8 +76,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return false; if (!match_tcp(skb, info, &par->hotdrop)) return false; } @@ -97,7 +95,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - ip->proto != IPPROTO_TCP) { + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info("cannot match TCP bits in rule for non-tcp packets\n"); return -EINVAL; } From db898aa2ef6529fa80891e754d353063611c77de Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:55 +0200 Subject: [PATCH 183/327] netfilter: ipt_ecn: fix inversion for IP header ECN match Userspace allows to specify inversion for IP header ECN matches, the kernel silently accepts it, but doesn't invert the match result. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index aaa85be1b2d8..2b57e52c746c 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); static inline bool match_ip(const struct sk_buff *skb, const struct ipt_ecn_info *einfo) { - return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; + return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & IPT_ECN_OP_MATCH_IP); } static inline bool match_tcp(const struct sk_buff *skb, From 2c38de4c1f8da799bdca0e4bb40ca13f2174d3e8 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 16 Jun 2011 17:27:04 +0200 Subject: [PATCH 184/327] netfilter: fix looped (broad|multi)cast's MAC handling By default, when broadcast or multicast packet are sent from a local application, they are sent to the interface then looped by the kernel to other local applications, going throught netfilter hooks in the process. These looped packet have their MAC header removed from the skb by the kernel looping code. This confuse various netfilter's netlink queue, netlink log and the legacy ip_queue, because they try to extract a hardware address from these packets, but extracts a part of the IP header instead. This patch prevent NFQUEUE, NFLOG and ip_QUEUE to include a MAC header if there is none in the packet. Signed-off-by: Nicolas Cavallari Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/nfnetlink_queue.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index f7f9bd7ba12d..5c9b9d963918 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 065fe405fb58..249394863284 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e0ee010935e7..2e7ccbb43ddb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); - if (indev && skb->dev) { + if (indev && skb->dev && + skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b83123f12b42..fdd2fafe0a14 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -335,7 +335,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev) { + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; int len = dev_parse_header(entskb, phw.hw_addr); if (len) { From 50338b889dc504c69e0cb316ac92d1b9e51f3c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Thu, 16 Jun 2011 00:06:14 +0300 Subject: [PATCH 185/327] fix wrong iput on d_inode introduced by e6bc45d65d MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git bisection shows that commit e6bc45d65df8599fdbae73be9cec4ceed274db53 causes BUG_ONs under high I/O load: kernel BUG at fs/inode.c:1368! [ 2862.501007] Call Trace: [ 2862.501007] [] d_kill+0xf8/0x140 [ 2862.501007] [] dput+0xc9/0x190 [ 2862.501007] [] fput+0x15f/0x210 [ 2862.501007] [] filp_close+0x61/0x90 [ 2862.501007] [] sys_close+0xb1/0x110 [ 2862.501007] [] system_call_fastpath+0x16/0x1b A reliable way to reproduce this bug is: Login to KDE, run 'rsnapshot sync', and apt-get install openjdk-6-jdk, and apt-get remove openjdk-6-jdk. The buggy part of the patch is this: struct inode *inode = NULL; ..... - if (nd.last.name[nd.last.len]) - goto slashes; inode = dentry->d_inode; - if (inode) - ihold(inode); + if (nd.last.name[nd.last.len] || !inode) + goto slashes; + ihold(inode) ... if (inode) iput(inode); /* truncate the inode here */ If nd.last.name[nd.last.len] is nonzero (and thus goto slashes branch is taken), and dentry->d_inode is non-NULL, then this code now does an additional iput on the inode, which is wrong. Fix this by only setting the inode variable if nd.last.name[nd.last.len] is 0. Reference: https://lkml.org/lkml/2011/6/15/50 Reported-by: Norbert Preining Reported-by: Török Edwin Cc: "Theodore Ts'o" Cc: Al Viro Signed-off-by: Török Edwin Signed-off-by: Al Viro --- fs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 9802345df5e7..6301963b161f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2713,8 +2713,10 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ + if (nd.last.name[nd.last.len]) + goto slashes; inode = dentry->d_inode; - if (nd.last.name[nd.last.len] || !inode) + if (!inode) goto slashes; ihold(inode); error = mnt_want_write(nd.path.mnt); From 8aef18845266f5c05904c610088f2d1ed58f6be3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 Jun 2011 15:10:06 +0100 Subject: [PATCH 186/327] VFS: Fix vfsmount overput on simultaneous automount [Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include #include #include #include #include int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [] shrink_dcache_for_umount+0x69/0x82 [] generic_shutdown_super+0x37/0x15b [] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [] kill_anon_super+0x1d/0x7e [] nfs4_kill_super+0x60/0xb6 [nfs] [] deactivate_locked_super+0x34/0x83 [] deactivate_super+0x6f/0x7b [] mntput_no_expire+0x18d/0x199 [] mntput+0x3b/0x44 [] release_mounts+0xa2/0xbf [] sys_umount+0x47a/0x4ba [] ? trace_hardirqs_on_caller+0x1fd/0x22f [] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton Tested-by: Ian Kent Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 6301963b161f..9e425e7e6c8f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -812,6 +812,11 @@ static int follow_automount(struct path *path, unsigned flags, if (!mnt) /* mount collision */ return 0; + if (!*need_mntput) { + /* lock_mount() may release path->mnt on error */ + mntget(path->mnt); + *need_mntput = true; + } err = finish_automount(mnt, path); switch (err) { @@ -819,12 +824,9 @@ static int follow_automount(struct path *path, unsigned flags, /* Someone else made a mount here whilst we were busy */ return 0; case 0: - dput(path->dentry); - if (*need_mntput) - mntput(path->mnt); + path_put(path); path->mnt = mnt; path->dentry = dget(mnt->mnt_root); - *need_mntput = true; return 0; default: return err; @@ -844,9 +846,10 @@ static int follow_automount(struct path *path, unsigned flags, */ static int follow_managed(struct path *path, unsigned flags) { + struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ unsigned managed; bool need_mntput = false; - int ret; + int ret = 0; /* Given that we're not holding a lock here, we retain the value in a * local variable for each dentry as we look at it so that we don't see @@ -861,7 +864,7 @@ static int follow_managed(struct path *path, unsigned flags) BUG_ON(!path->dentry->d_op->d_manage); ret = path->dentry->d_op->d_manage(path->dentry, false); if (ret < 0) - return ret == -EISDIR ? 0 : ret; + break; } /* Transit to a mounted filesystem. */ @@ -887,14 +890,19 @@ static int follow_managed(struct path *path, unsigned flags) if (managed & DCACHE_NEED_AUTOMOUNT) { ret = follow_automount(path, flags, &need_mntput); if (ret < 0) - return ret == -EISDIR ? 0 : ret; + break; continue; } /* We didn't change the current path point */ break; } - return 0; + + if (need_mntput && path->mnt == mnt) + mntput(path->mnt); + if (ret == -EISDIR) + ret = 0; + return ret; } int follow_down_one(struct path *path) From e1d76719ea3f3f755d597cef9c2087bdda5fea43 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 15:00:18 -0700 Subject: [PATCH 187/327] staging: fix iio builds when IIO_RING_BUFFER is not enabled Fix build by moving enum list outside of #ifdef CONFIG_IIO_RING_BUFFER. drivers/staging/iio/accel/adis16201_core.c:413: error: 'ADIS16201_SCAN_SUPPLY' undeclared here (not in a function) drivers/staging/iio/accel/adis16201_core.c:417: error: 'ADIS16201_SCAN_TEMP' undeclared here (not in a function) .. drivers/staging/iio/accel/adis16203_core.c:374: error: 'ADIS16203_SCAN_SUPPLY' undeclared here (not in a function) drivers/staging/iio/accel/adis16203_core.c:378: error: 'ADIS16203_SCAN_AUX_ADC' undeclared here (not in a function) .. Signed-off-by: Randy Dunlap Acked-by: Jonathan Cameron Cc: linux-iio@vger.kernel.org Signed-off-by: Linus Torvalds --- drivers/staging/iio/accel/adis16201.h | 2 +- drivers/staging/iio/accel/adis16203.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/iio/accel/adis16201.h b/drivers/staging/iio/accel/adis16201.h index 0b9b85424dfa..4cc1a5bfab40 100644 --- a/drivers/staging/iio/accel/adis16201.h +++ b/drivers/staging/iio/accel/adis16201.h @@ -81,7 +81,6 @@ struct adis16201_state { int adis16201_set_irq(struct iio_dev *indio_dev, bool enable); -#ifdef CONFIG_IIO_RING_BUFFER enum adis16201_scan { ADIS16201_SCAN_SUPPLY, ADIS16201_SCAN_ACC_X, @@ -92,6 +91,7 @@ enum adis16201_scan { ADIS16201_SCAN_INCLI_Y, }; +#ifdef CONFIG_IIO_RING_BUFFER void adis16201_remove_trigger(struct iio_dev *indio_dev); int adis16201_probe_trigger(struct iio_dev *indio_dev); diff --git a/drivers/staging/iio/accel/adis16203.h b/drivers/staging/iio/accel/adis16203.h index 8bb8ce50c248..175e21bb9b40 100644 --- a/drivers/staging/iio/accel/adis16203.h +++ b/drivers/staging/iio/accel/adis16203.h @@ -76,7 +76,6 @@ struct adis16203_state { int adis16203_set_irq(struct iio_dev *indio_dev, bool enable); -#ifdef CONFIG_IIO_RING_BUFFER enum adis16203_scan { ADIS16203_SCAN_SUPPLY, ADIS16203_SCAN_AUX_ADC, @@ -85,6 +84,7 @@ enum adis16203_scan { ADIS16203_SCAN_INCLI_Y, }; +#ifdef CONFIG_IIO_RING_BUFFER void adis16203_remove_trigger(struct iio_dev *indio_dev); int adis16203_probe_trigger(struct iio_dev *indio_dev); From 42c1edd345c8412d96e7a362ee06feb7be73bb6c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 16 Jun 2011 17:29:22 +0200 Subject: [PATCH 188/327] netfilter: nf_nat: avoid double seq_adjust for loopback Avoid double seq adjustment for loopback traffic because it causes silent repetition of TCP data. One example is passive FTP with DNAT rule and difference in the length of IP addresses. This patch adds check if packet is sent and received via loopback device. As the same conntrack is used both for outgoing and incoming direction, we restrict seq adjustment to happen only in POSTROUTING. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c7c42e7acc31..5d4f8e586e32 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -307,6 +307,12 @@ static inline int nf_ct_is_untracked(const struct nf_conn *ct) return test_bit(IPS_UNTRACKED_BIT, &ct->status); } +/* Packet is received from loopback */ +static inline bool nf_is_loopback_packet(const struct sk_buff *skb) +{ + return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; +} + extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index db10075dd88e..de9da21113a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); From 5e7f23373bf9a853e9256e81e86724cdd0a33c29 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Jun 2011 22:31:12 +0100 Subject: [PATCH 189/327] afs: afs_fill_page reads too much, or wrong data afs_fill_page should read the page that is about to be written but the current implementation has a number of issues. If we aren't extending the file we always read PAGE_CACHE_SIZE at offset 0. If we are extending the file we try to read the entire file. Change afs_fill_page to read PAGE_CACHE_SIZE at the right offset, clamped to i_size. While here, avoid calling afs_fill_page when we are doing a PAGE_CACHE_SIZE write. Signed-off-by: Anton Blanchard Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/afs/write.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 789b3afb3423..b806285ff853 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb) * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - loff_t pos, unsigned len, struct page *page) + loff_t pos, struct page *page) { loff_t i_size; - unsigned eof; int ret; + int len; - _enter(",,%llu,%u", (unsigned long long)pos, len); - - ASSERTCMP(len, <=, PAGE_CACHE_SIZE); + _enter(",,%llu", (unsigned long long)pos); i_size = i_size_read(&vnode->vfs_inode); - if (pos + len > i_size) - eof = i_size; + if (pos + PAGE_CACHE_SIZE > i_size) + len = i_size - pos; else - eof = PAGE_CACHE_SIZE; + len = PAGE_CACHE_SIZE; - ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); + ret = afs_vnode_fetch_data(vnode, key, pos, len, page); if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" @@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping, *pagep = page; /* page won't leak in error case: it eventually gets cleaned off LRU */ - if (!PageUptodate(page)) { - _debug("not up to date"); - ret = afs_fill_page(vnode, key, pos, len, page); + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) { + ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page); if (ret < 0) { kfree(candidate); _leave(" = %d [prep]", ret); From f9f07b6c1372b1436aa6b45333445b443ffd8c95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 14 Jun 2011 00:58:27 +0200 Subject: [PATCH 190/327] vfs: Fix data corruption after failed write in __block_write_begin() I've got a report of a file corruption from fsxlinux on ext3. The important operations to the page were: mapwrite to a hole partial write to the page read - found the page zeroed from the end of the normal write The culprit seems to be that if get_block() fails in __block_write_begin() (e.g. transient ENOSPC in ext3), the function does ClearPageUptodate(page). Thus when we retry the write, the logic in __block_write_begin() thinks zeroing of the page is needed and overwrites old data. In fact, I don't see why we should ever need to zero the uptodate bit here - either the page was uptodate when we entered __block_write_begin() and it should stay so when we leave it, or it was not uptodate and noone had right to set it uptodate during __block_write_begin() so it remains !uptodate when we leave as well. So just remove clearing of the bit. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/buffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 49c9aada0374..1a80b048ade8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (unlikely(err)) { + if (unlikely(err)) page_zero_new_buffers(page, from, to); - ClearPageUptodate(page); - } return err; } EXPORT_SYMBOL(__block_write_begin); From 2e41ae225f742ded5b7d9847cd8bd605f27daba8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jun 2011 00:38:44 +0100 Subject: [PATCH 191/327] AFS: Set s_id in the superblock to the volume name Set s_id in the superblock to the name of the AFS volume that this superblock corresponds to. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/afs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index b7d48d7eaa1e..356dcf0929e8 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -313,6 +313,7 @@ static int afs_fill_super(struct super_block *sb, sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; sb->s_bdi = &as->volume->bdi; + strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); /* allocate the root inode and dentry */ fid.vid = as->volume->vid; From d6e43f751f252c68ca69fa6d18665d88d69ef8b7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jun 2011 00:45:44 +0100 Subject: [PATCH 192/327] AFS: Use i_generation not i_version for the vnode uniquifier Store the AFS vnode uniquifier in the i_generation field, not the i_version field of the inode struct. i_version can then be given the AFS data version number. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/afs/dir.c | 8 ++++---- fs/afs/fsclient.c | 3 ++- fs/afs/inode.c | 10 +++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20c106f24927..1b0b19550015 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, success: d_add(dentry, inode); - _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", + _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }", fid.vnode, fid.unique, dentry->d_inode->i_ino, - (unsigned long long)dentry->d_inode->i_version); + dentry->d_inode->i_generation); return NULL; } @@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) * been deleted and replaced, and the original vnode ID has * been reused */ if (fid.unique != vnode->fid.unique) { - _debug("%s: file deleted (uq %u -> %u I:%llu)", + _debug("%s: file deleted (uq %u -> %u I:%u)", dentry->d_name.name, fid.unique, vnode->fid.unique, - (unsigned long long)dentry->d_inode->i_version); + dentry->d_inode->i_generation); spin_lock(&vnode->lock); set_bit(AFS_VNODE_DELETED, &vnode->flags); spin_unlock(&vnode->lock); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4bd0218473a9..346e3289abd7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, i_size_write(&vnode->vfs_inode, size); vnode->vfs_inode.i_uid = status->owner; vnode->vfs_inode.i_gid = status->group; - vnode->vfs_inode.i_version = vnode->fid.unique; + vnode->vfs_inode.i_generation = vnode->fid.unique; vnode->vfs_inode.i_nlink = status->nlink; mode = vnode->vfs_inode.i_mode; @@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; + vnode->vfs_inode.i_version = data_version; } expected_version = status->data_version; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index db66c5201474..0fdab6e03d87 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; - inode->i_version = vnode->fid.unique; + inode->i_generation = vnode->fid.unique; + inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; /* check to see whether a symbolic link is really a mountpoint */ @@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque) struct afs_iget_data *data = opaque; return inode->i_ino == data->fid.vnode && - inode->i_version == data->fid.unique; + inode->i_generation == data->fid.unique; } /* @@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque) struct afs_vnode *vnode = AFS_FS_I(inode); inode->i_ino = data->fid.vnode; - inode->i_version = data->fid.unique; + inode->i_generation = data->fid.unique; vnode->fid = data->fid; vnode->volume = data->volume; @@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, inode = dentry->d_inode; - _enter("{ ino=%lu v=%llu }", inode->i_ino, - (unsigned long long)inode->i_version); + _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); generic_fillattr(inode, stat); return 0; From a27a263bae072a499acc77b632238a6dacccf888 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Jun 2011 12:02:23 +0000 Subject: [PATCH 193/327] xfs: make log devices with write back caches work There's no reason not to support cache flushing on external log devices. The only thing this really requires is flushing the data device first both in fsync and log commits. A side effect is that we also have to remove the barrier write test during mount, which has been superflous since the new FLUSH+FUA code anyway. Also use the chance to flush the RT subvolume write cache before the fsync commit, which is required for correct semantics. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_file.c | 50 +++++++++++++++--------- fs/xfs/linux-2.6/xfs_super.c | 75 ------------------------------------ fs/xfs/xfs_log.c | 11 +++++- 3 files changed, 41 insertions(+), 95 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f4213ba1ff85..7f782af286bf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -131,19 +131,34 @@ xfs_file_fsync( { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = 0; int log_flushed = 0; trace_xfs_file_fsync(ip); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); xfs_ioend_wait(ip); + if (mp->m_flags & XFS_MOUNT_BARRIER) { + /* + * If we have an RT and/or log subvolume we need to make sure + * to flush the write cache the device used for file data + * first. This is to ensure newly written file data make + * it to disk before logging the new inode size in case of + * an extending write. + */ + if (XFS_IS_REALTIME_INODE(ip)) + xfs_blkdev_issue_flush(mp->m_rtdev_targp); + else if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + } + /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the @@ -175,9 +190,9 @@ xfs_file_fsync( * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); + XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; @@ -209,28 +224,25 @@ xfs_file_fsync( * force the log. */ if (xfs_ipincount(ip)) { - error = _xfs_log_force_lsn(ip->i_mount, + error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } - if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { - /* - * If the log write didn't issue an ordered tag we need - * to flush the disk cache for the data device now. - */ - if (!log_flushed) - xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); - - /* - * If this inode is on the RT dev we need to flush that - * cache as well. - */ - if (XFS_IS_REALTIME_INODE(ip)) - xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); - } + /* + * If we only have a single device, and the log force about was + * a no-op we might have to flush the data device cache here. + * This can only happen for fdatasync/O_DSYNC if we were overwriting + * an already allocated file and thus do not have any metadata to + * commit. + */ + if ((mp->m_flags & XFS_MOUNT_BARRIER) && + mp->m_logdev_targp == mp->m_ddev_targp && + !XFS_IS_REALTIME_INODE(ip) && + !log_flushed) + xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1e3a7ce804dc..a1a881e68a9a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -627,68 +627,6 @@ xfs_blkdev_put( blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } -/* - * Try to write out the superblock using barriers. - */ -STATIC int -xfs_barrier_test( - xfs_mount_t *mp) -{ - xfs_buf_t *sbp = xfs_getsb(mp, 0); - int error; - - XFS_BUF_UNDONE(sbp); - XFS_BUF_UNREAD(sbp); - XFS_BUF_UNDELAYWRITE(sbp); - XFS_BUF_WRITE(sbp); - XFS_BUF_UNASYNC(sbp); - XFS_BUF_ORDERED(sbp); - - xfsbdstrat(mp, sbp); - error = xfs_buf_iowait(sbp); - - /* - * Clear all the flags we set and possible error state in the - * buffer. We only did the write to try out whether barriers - * worked and shouldn't leave any traces in the superblock - * buffer. - */ - XFS_BUF_DONE(sbp); - XFS_BUF_ERROR(sbp, 0); - XFS_BUF_UNORDERED(sbp); - - xfs_buf_relse(sbp); - return error; -} - -STATIC void -xfs_mountfs_check_barriers(xfs_mount_t *mp) -{ - int error; - - if (mp->m_logdev_targp != mp->m_ddev_targp) { - xfs_notice(mp, - "Disabling barriers, not supported with external log device"); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - return; - } - - if (xfs_readonly_buftarg(mp->m_ddev_targp)) { - xfs_notice(mp, - "Disabling barriers, underlying device is readonly"); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - return; - } - - error = xfs_barrier_test(mp); - if (error) { - xfs_notice(mp, - "Disabling barriers, trial barrier write failed"); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - return; - } -} - void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) @@ -1240,14 +1178,6 @@ xfs_fs_remount( switch (token) { case Opt_barrier: mp->m_flags |= XFS_MOUNT_BARRIER; - - /* - * Test if barriers are actually working if we can, - * else delay this check until the filesystem is - * marked writeable. - */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - xfs_mountfs_check_barriers(mp); break; case Opt_nobarrier: mp->m_flags &= ~XFS_MOUNT_BARRIER; @@ -1282,8 +1212,6 @@ xfs_fs_remount( /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { mp->m_flags &= ~XFS_MOUNT_RDONLY; - if (mp->m_flags & XFS_MOUNT_BARRIER) - xfs_mountfs_check_barriers(mp); /* * If this is the first remount to writeable state we @@ -1465,9 +1393,6 @@ xfs_fs_fill_super( if (error) goto out_free_sb; - if (mp->m_flags & XFS_MOUNT_BARRIER) - xfs_mountfs_check_barriers(mp); - error = xfs_filestream_mount(mp); if (error) goto out_free_sb; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 211930246f20..41d5b8f2bf92 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log, XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_LOG_BUFFER; - if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) + if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { + /* + * If we have an external log device, flush the data device + * before flushing the log to make sure all meta data + * written back from the AIL actually made it to disk + * before writing out the new log tail LSN in the log buffer. + */ + if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) + xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); XFS_BUF_ORDERED(bp); + } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); From ee7b75fc4f3ae49e1f25bf56219bb5de3c29afaf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 16 Jun 2011 13:15:41 -0400 Subject: [PATCH 194/327] NFSv4: Fix a readdir regression Commit 7ebb9315 (NFS: use secinfo when crossing mountpoints) introduces a regression when decoding an NFSv4 readdir entry that sets the rdattr_error field. By treating the resulting value as if it is a decoding error, the current code may cause us to skip valid readdir entries. Reported-by: Andy Adamson Cc: stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c4b7d6c04948..5db44a8dddf9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3098,7 +3098,7 @@ out_overflow: return -EIO; } -static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) +static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res) { __be32 *p; @@ -3109,7 +3109,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) if (unlikely(!p)) goto out_overflow; bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; - return -be32_to_cpup(p); + *res = -be32_to_cpup(p); } return 0; out_overflow: @@ -4070,6 +4070,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, int status; umode_t fmode = 0; uint32_t type; + int32_t err; status = decode_attr_type(xdr, bitmap, &type); if (status < 0) @@ -4095,13 +4096,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_error(xdr, bitmap); - if (status == -NFS4ERR_WRONGSEC) { - nfs_fixup_secinfo_attributes(fattr, fh); - status = 0; - } + err = 0; + status = decode_attr_error(xdr, bitmap, &err); if (status < 0) goto xdr_error; + if (err == -NFS4ERR_WRONGSEC) + nfs_fixup_secinfo_attributes(fattr, fh); status = decode_attr_filehandle(xdr, bitmap, fh); if (status < 0) From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jun 2011 16:22:08 +0200 Subject: [PATCH 195/327] clocksource: Make watchdog robust vs. interruption The clocksource watchdog code is interruptible and it has been observed that this can trigger false positives which disable the TSC. The reason is that an interrupt storm or a long running interrupt handler between the read of the watchdog source and the read of the TSC brings the two far enough apart that the delta is larger than the unstable treshold. Move both reads into a short interrupt disabled region to avoid that. Reported-and-tested-by: Vernon Mauery Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d4646b48dc4a..18a1baf31f2d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -188,6 +188,7 @@ struct clocksource { #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; + cycle_t cs_last; cycle_t wd_last; #endif } ____cacheline_aligned; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c95fd677328..e0980f0d9a0a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -185,7 +185,6 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); -static cycle_t watchdog_last; static int watchdog_running; static int clocksource_watchdog_kthread(void *data); @@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) if (!watchdog_running) goto out; - wdnow = watchdog->read(watchdog); - wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, - watchdog->mult, watchdog->shift); - watchdog_last = wdnow; - list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) continue; } + local_irq_disable(); csnow = cs->read(cs); + wdnow = watchdog->read(watchdog); + local_irq_enable(); /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; - cs->wd_last = csnow; + cs->wd_last = wdnow; + cs->cs_last = csnow; continue; } - /* Check the deviation from the watchdog clocksource. */ - cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, + watchdog->mult, watchdog->shift); + + cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & cs->mask, cs->mult, cs->shift); - cs->wd_last = csnow; + cs->cs_last = csnow; + cs->wd_last = wdnow; + + /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; @@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) return; init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; - watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; From acd049c6e99d2ad1195666195230f6881d1c1588 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Jun 2011 13:07:19 -0400 Subject: [PATCH 196/327] xen/setup: Fix for incorrect xen_extra_mem_start. The earlier attempts (24bdb0b62cc82120924762ae6bc85afc8c3f2b26) at fixing this problem caused other problems to surface (PV guests with no PCI passthrough would have SWIOTLB turned on - which meant 64MB of precious contingous DMA32 memory being eaten up per guest). The problem was: "on xen we add an extra memory region at the end of the e820, and on this particular machine this extra memory region would start below 4g and cross over the 4g boundary: [0xfee01000-0x192655000) Unfortunately e820_end_of_low_ram_pfn does not expect an e820 layout like that so it returns 4g, therefore initial_memory_mapping will map [0 - 0x100000000), that is a memory range that includes some reserved memory regions." The memory range was the IOAPIC regions, and with the 1-1 mapping turned on, it would map them as RAM, not as MMIO regions. This caused the hypervisor to complain. Fortunately this is experienced only under the initial domain so we guard for it. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index be1a464f6d66..60aeeb56948f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -227,11 +227,7 @@ char * __init xen_memory_setup(void) memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; -#ifdef CONFIG_X86_32 xen_extra_mem_start = mem_end; -#else - xen_extra_mem_start = max((1ULL << 32), mem_end); -#endif for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end; @@ -266,6 +262,12 @@ char * __init xen_memory_setup(void) if (map[i].size > 0) e820_add_region(map[i].addr, map[i].size, map[i].type); } + /* Align the balloon area so that max_low_pfn does not get set + * to be at the _end_ of the PCI gap at the far end (fee01000). + * Note that xen_extra_mem_start gets set in the loop above to be + * past the last E820 region. */ + if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) + xen_extra_mem_start = (1ULL<<32); /* * In domU, the ISA region is normal, usable memory, but we From 0ec5258d68c626922d92e2f0e4e5c689e5360a5d Mon Sep 17 00:00:00 2001 From: Torsten Schenk Date: Thu, 16 Jun 2011 21:06:27 +0200 Subject: [PATCH 197/327] ALSA: 6fire - Fix signedness bug Fixed remaining issues of the signedness bug discovered by Dan Carpenter. A check was remaining that tests if unsigned rt->rate is >= 0. Changed that so that rt->rate now consistently uses ARRAY_SIZE(rates) as invalid rate value and not -1. Signed-off-by: Torsten Schenk Signed-off-by: Takashi Iwai --- sound/usb/6fire/pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index b137b25865cc..d144cdb2f159 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -395,12 +395,12 @@ static int usb6fire_pcm_open(struct snd_pcm_substream *alsa_sub) alsa_rt->hw = pcm_hw; if (alsa_sub->stream == SNDRV_PCM_STREAM_PLAYBACK) { - if (rt->rate >= 0) + if (rt->rate < ARRAY_SIZE(rates)) alsa_rt->hw.rates = rates_alsaid[rt->rate]; alsa_rt->hw.channels_max = OUT_N_CHANNELS; sub = &rt->playback; } else if (alsa_sub->stream == SNDRV_PCM_STREAM_CAPTURE) { - if (rt->rate >= 0) + if (rt->rate < ARRAY_SIZE(rates)) alsa_rt->hw.rates = rates_alsaid[rt->rate]; alsa_rt->hw.channels_max = IN_N_CHANNELS; sub = &rt->capture; From 46a310b80bc2c9ccc019649c9da91194cbc10944 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Jun 2011 15:36:38 -0400 Subject: [PATCH 198/327] [CPUFREQ] Don't set stat->last_index to -1 if the pol->cur has incorrect value. If the driver submitted an non-existing pol>cur value (say it used the default initialized value of zero), when the cpufreq stats tries to setup its initial values it incorrectly sets stat->last_index to -1 (or 0xfffff...). And cpufreq_stats_update tries to update at that index location and fails. This can be caused by: stat->last_index = freq_table_get_index(stat, policy->cur); not finding the appropiate frequency in the table (b/c the policy->cur is wrong) and we end up crashing. The fix however is concentrated in the 'cpufreq_stats_update' as the last_index (and old_index) are updated there. Which means it can reset the last_index to -1 again and on the next iteration cause a crash. Without this patch, the following crash is observed: powernow-k8: Found 1 AMD Athlon(tm) 64 Processor 3700+ (1 cpu cores) (version 2.20.00) powernow-k8: fid 0x2 (1000 MHz), vid 0x12 powernow-k8: fid 0xa (1800 MHz), vid 0xa powernow-k8: fid 0xc (2000 MHz), vid 0x8 powernow-k8: fid 0xe (2200 MHz), vid 0x8 Marking TSC unstable due to cpufreq changes powernow-k8: fid trans failed, fid 0x2, curr 0x0 BUG: unable to handle kernel paging request at ffff880807e07b78 IP: [] cpufreq_stats_update+0x46/0x5b .. snip.. Pid: 1, comm: swapper Not tainted 3.0.0-rc2 #45 MICRO-STAR INTERNATIONAL CO., LTD MS-7094/MS-7094 ..snip.. Call Trace: [] cpufreq_stat_notifier_trans+0x48/0x7c [] notifier_call_chain+0x32/0x5e [] __srcu_notifier_call_chain+0x47/0x63 [] srcu_notifier_call_chain+0xf/0x11 [] cpufreq_notify_transition+0x111/0x134 [] powernowk8_target+0x53b/0x617 [] __cpufreq_driver_target+0x2e/0x30 [] cpufreq_governor_dbs+0x339/0x356 [] __cpufreq_governor+0xa8/0xe9 [] __cpufreq_set_policy+0x132/0x13e [] cpufreq_add_dev_interface+0x272/0x28c Reported-by: Tobias Diedrich Tested-by: Tobias Diedrich Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_stats.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 853f92d23ddb..faf7c5217848 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -298,11 +298,13 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, old_index = stat->last_index; new_index = freq_table_get_index(stat, freq->new); - cpufreq_stats_update(freq->cpu); - if (old_index == new_index) + /* We can't do stat->time_in_state[-1]= .. */ + if (old_index == -1 || new_index == -1) return 0; - if (old_index == -1 || new_index == -1) + cpufreq_stats_update(freq->cpu); + + if (old_index == new_index) return 0; spin_lock(&cpufreq_stats_lock); From a9d3d2068064b7a6395871a49616d3784f802d50 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Jun 2011 15:36:39 -0400 Subject: [PATCH 199/327] [CPUFREQ] powernow-k8: Don't notify of successful transition if we failed (vid case). Before this patch if we failed the vid transition would still try to submit the "new" frequencies to cpufreq. That is incorrect - also we could submit a non-existing frequency value which would cause cpufreq to crash. The ultimate fix is in cpufreq to deal with incorrect values, but this patch improves the error recovery in the AMD powernowk8 driver. The failure that was reported was as follows: powernow-k8: Found 1 AMD Athlon(tm) 64 Processor 3700+ (1 cpu cores) (version 2.20.00) powernow-k8: fid 0x2 (1000 MHz), vid 0x12 powernow-k8: fid 0xa (1800 MHz), vid 0xa powernow-k8: fid 0xc (2000 MHz), vid 0x8 powernow-k8: fid 0xe (2200 MHz), vid 0x8 Marking TSC unstable due to cpufreq changes powernow-k8: fid trans failed, fid 0x2, curr 0x0 BUG: unable to handle kernel paging request at ffff880807e07b78 IP: [] cpufreq_stats_update+0x46/0x5b ... And transition fails and data->currfid ends up with 0. Since the machine does not support 800Mhz value when the calculation is done ('find_khz_freq_from_fid(data->currfid);') it reports the new frequency as 800000 which is bogus. This patch fixes the issue during target setting. The patch however does not fix the issue in 'powernowk8_cpu_init' where the pol->cur can also be set with the 800000 value: pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); /* min/max the cpu is capable of */ if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { The fix for that looks to update cpufreq_frequency_table_cpuinfo to check pol->cur.... but that would cause an regression in how the acpi-cpufreq driver works (it sets cpu->cur after calling cpufreq_frequency_table_cpuinfo). Instead the fix will be to let cpufreq gracefully handle bogus data (another patch). Acked-by: Borislav Petkov CC: andre.przywara@amd.com CC: Mark.Langsdorf@amd.com Reported-by: Tobias Diedrich Tested-by: Tobias Diedrich [v1: Rebased on v3.0-rc2, reduced patch to deal with vid case] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Dave Jones --- drivers/cpufreq/powernow-k8.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 83479b6fb9a1..287c56f6749a 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1079,6 +1079,9 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, } res = transition_fid_vid(data, fid, vid); + if (res) + return res; + freqs.new = find_khz_freq_from_fid(data->currfid); for_each_cpu(i, data->available_cores) { From fbb5b89eabea5ae7d621b7861863159560d8faa4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Jun 2011 15:36:40 -0400 Subject: [PATCH 200/327] [CPUFREQ] powernow-k8: Don't try to transition if the pstate is incorrect This patch augments the pstate transition code to error out (instead of returning 0) when an incorrect pstate is provided. Suggested-by: Borislav Petkov CC: andre.przywara@amd.com CC: Mark.Langsdorf@amd.com Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Dave Jones --- drivers/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 287c56f6749a..bce576d7478e 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -1104,7 +1104,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, /* get MSR index for hardware pstate transition */ pstate = index & HW_PSTATE_MASK; if (pstate > data->max_hw_pstate) - return 0; + return -EINVAL; + freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); From 99a15e21d96f6857dafab1e5167e5e8183215c9c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 16 Jun 2011 12:56:19 -0700 Subject: [PATCH 201/327] migrate: don't account swapcache as shmem swapcache will reach the below code path in migrate_page_move_mapping, and swapcache is accounted as NR_FILE_PAGES but it's not accounted as NR_SHMEM. Hugh pointed out we must use PageSwapCache instead of comparing mapping to &swapper_space, to avoid build failure with CONFIG_SWAP=n. Signed-off-by: Andrea Arcangeli Acked-by: Hugh Dickins Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index e4a5c912983d..666e4e677414 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -288,7 +288,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ __dec_zone_page_state(page, NR_FILE_PAGES); __inc_zone_page_state(newpage, NR_FILE_PAGES); - if (PageSwapBacked(page)) { + if (!PageSwapCache(page) && PageSwapBacked(page)) { __dec_zone_page_state(page, NR_SHMEM); __inc_zone_page_state(newpage, NR_SHMEM); } From f8b7fc6b514f34a51875dd48dff70d4d17a54f38 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Jun 2011 08:26:32 -0700 Subject: [PATCH 202/327] rcu: Move RCU_BOOST #ifdefs to header file The commit "use softirq instead of kthreads except when RCU_BOOST=y" just applied #ifdef in place. This commit is a cleanup that moves the newly #ifdef'ed code to the header file kernel/rcutree_plugin.h. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 383 +-------------------------------------- kernel/rcutree.h | 5 + kernel/rcutree_plugin.h | 384 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 382 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 429d4949f0eb..7e59ffb3d0ba 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1093,16 +1093,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; -#ifdef CONFIG_RCU_BOOST - struct task_struct *t; - /* Stop the CPU's kthread. */ - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t != NULL) { - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; - kthread_stop(t); - } -#endif /* #ifdef CONFIG_RCU_BOOST */ + rcu_stop_cpu_kthread(cpu); /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1453,17 +1445,6 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) invoke_rcu_callbacks(rsp, rdp); } -#ifdef CONFIG_RCU_BOOST - -static void rcu_kthread_do_work(void) -{ - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); - rcu_preempt_do_callbacks(); -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ - /* * Do softirq processing for the current CPU. */ @@ -1498,345 +1479,6 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } -#ifdef CONFIG_RCU_BOOST - -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ - struct task_struct *t; - - t = rnp->node_kthread_task; - if (t != NULL) - wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument. The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ - int policy; - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t == NULL) - return; - if (to_rt) { - policy = SCHED_FIFO; - sp.sched_priority = RCU_KTHREAD_PRIO; - } else { - policy = SCHED_NORMAL; - sp.sched_priority = 0; - } - sched_setscheduler_nocheck(t, policy, &sp); -} - -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); - struct rcu_node *rnp = rdp->mynode; - - atomic_or(rdp->grpmask, &rnp->wakemask); - invoke_rcu_node_kthread(rnp); -} - -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted. Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) -{ - struct sched_param sp; - struct timer_list yield_timer; - - setup_timer_on_stack(&yield_timer, f, arg); - mod_timer(&yield_timer, jiffies + 2); - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); - set_user_nice(current, 19); - schedule(); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); - del_timer(&yield_timer); -} - -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline. We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh. This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) -{ - while (cpu_is_offline(cpu) || - !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || - smp_processor_id() != cpu) { - if (kthread_should_stop()) - return 1; - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); - local_bh_enable(); - schedule_timeout_uninterruptible(1); - if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - local_bh_disable(); - } - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - return 0; -} - -/* - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the - * earlier RCU softirq. - */ -static int rcu_cpu_kthread(void *arg) -{ - int cpu = (int)(long)arg; - unsigned long flags; - int spincnt = 0; - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - char work; - char *workp = &per_cpu(rcu_cpu_has_work, cpu); - - for (;;) { - *statusp = RCU_KTHREAD_WAITING; - rcu_wait(*workp != 0 || kthread_should_stop()); - local_bh_disable(); - if (rcu_cpu_kthread_should_stop(cpu)) { - local_bh_enable(); - break; - } - *statusp = RCU_KTHREAD_RUNNING; - per_cpu(rcu_cpu_kthread_loops, cpu)++; - local_irq_save(flags); - work = *workp; - *workp = 0; - local_irq_restore(flags); - if (work) - rcu_kthread_do_work(); - local_bh_enable(); - if (*workp != 0) - spincnt++; - else - spincnt = 0; - if (spincnt > 10) { - *statusp = RCU_KTHREAD_YIELDING; - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); - spincnt = 0; - } - } - *statusp = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task. There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online. We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods. So we bind the per-CPU kthread to its CPU only if the CPU - * is online. If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) - return 0; - t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); - if (IS_ERR(t)) - return PTR_ERR(t); - if (cpu_online(cpu)) - kthread_bind(t, cpu); - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ - return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed. We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ - int cpu; - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp = (struct rcu_node *)arg; - struct sched_param sp; - struct task_struct *t; - - for (;;) { - rnp->node_kthread_status = RCU_KTHREAD_WAITING; - rcu_wait(atomic_read(&rnp->wakemask) != 0); - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; - raw_spin_lock_irqsave(&rnp->lock, flags); - mask = atomic_xchg(&rnp->wakemask, 0); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { - if ((mask & 0x1) == 0) - continue; - preempt_disable(); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (!cpu_online(cpu) || t == NULL) { - preempt_enable(); - continue; - } - per_cpu(rcu_cpu_has_work, cpu) = 1; - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - preempt_enable(); - } - } - /* NOTREACHED */ - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Set the per-rcu_node kthread's affinity to cover all CPUs that are - * served by the rcu_node in question. The CPU hotplug lock is still - * held, so the value of rnp->qsmaskinit will be stable. - * - * We don't include outgoingcpu in the affinity set, use -1 if there is - * no outgoing CPU. If there are no CPUs left in the affinity set, - * this function allows the kthread to execute on any CPU. - */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ - cpumask_var_t cm; - int cpu; - unsigned long mask = rnp->qsmaskinit; - - if (rnp->node_kthread_task == NULL) - return; - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) - return; - cpumask_clear(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) - if ((mask & 0x1) && cpu != outgoingcpu) - cpumask_set_cpu(cpu, cm); - if (cpumask_weight(cm) == 0) { - cpumask_setall(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) - cpumask_clear_cpu(cpu, cm); - WARN_ON_ONCE(cpumask_weight(cm) == 0); - } - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); - rcu_boost_kthread_setaffinity(rnp, cm); - free_cpumask_var(cm); -} - -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held. So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - unsigned long flags; - int rnp_index = rnp - &rsp->node[0]; - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - rnp->qsmaskinit == 0) - return 0; - if (rnp->node_kthread_task == NULL) { - t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun%d", rnp_index); - if (IS_ERR(t)) - return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->node_kthread_task = t; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = 99; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - } - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} - -/* - * Spawn all kthreads -- called as soon as the scheduler is running. - */ -static int __init rcu_spawn_kthreads(void) -{ - int cpu; - struct rcu_node *rnp; - - rcu_kthreads_spawnable = 1; - for_each_possible_cpu(cpu) { - per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) - (void)rcu_spawn_one_cpu_kthread(cpu); - } - rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } - return 0; -} -early_initcall(rcu_spawn_kthreads); - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2243,29 +1885,6 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } -#ifdef CONFIG_RCU_BOOST - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - - /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ - if (rcu_kthreads_spawnable) { - (void)rcu_spawn_one_cpu_kthread(cpu); - if (rnp->node_kthread_task == NULL) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } -} - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - /* * Handle CPU online/offline notification events. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 434288c7ad88..01b2ccda26fb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -427,6 +427,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags); +static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); @@ -460,6 +461,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index); +static void invoke_rcu_node_kthread(struct rcu_node *rnp); +static void rcu_yield(void (*f)(unsigned long), unsigned long arg); #endif /* #ifdef CONFIG_RCU_BOOST */ +static void rcu_cpu_kthread_setrt(int cpu, int to_rt); +static void __cpuinit rcu_prepare_kthreads(int cpu); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 2772386c0421..14dc7dd00902 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1330,6 +1330,370 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Stop the RCU's per-CPU kthread when its CPU goes offline,. + */ +static void rcu_stop_cpu_kthread(int cpu) +{ + struct task_struct *t; + + /* Stop the CPU's kthread. */ + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t != NULL) { + per_cpu(rcu_cpu_kthread_task, cpu) = NULL; + kthread_stop(t); + } +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); +} + +/* + * Wake up the specified per-rcu_node-structure kthread. + * Because the per-rcu_node kthreads are immortal, we don't need + * to do anything to keep them alive. + */ +static void invoke_rcu_node_kthread(struct rcu_node *rnp) +{ + struct task_struct *t; + + t = rnp->node_kthread_task; + if (t != NULL) + wake_up_process(t); +} + +/* + * Set the specified CPU's kthread to run RT or not, as specified by + * the to_rt argument. The CPU-hotplug locks are held, so the task + * is not going away. + */ +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ + int policy; + struct sched_param sp; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t == NULL) + return; + if (to_rt) { + policy = SCHED_FIFO; + sp.sched_priority = RCU_KTHREAD_PRIO; + } else { + policy = SCHED_NORMAL; + sp.sched_priority = 0; + } + sched_setscheduler_nocheck(t, policy, &sp); +} + +/* + * Timer handler to initiate the waking up of per-CPU kthreads that + * have yielded the CPU due to excess numbers of RCU callbacks. + * We wake up the per-rcu_node kthread, which in turn will wake up + * the booster kthread. + */ +static void rcu_cpu_kthread_timer(unsigned long arg) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); + struct rcu_node *rnp = rdp->mynode; + + atomic_or(rdp->grpmask, &rnp->wakemask); + invoke_rcu_node_kthread(rnp); +} + +/* + * Drop to non-real-time priority and yield, but only after posting a + * timer that will cause us to regain our real-time priority if we + * remain preempted. Either way, we restore our real-time priority + * before returning. + */ +static void rcu_yield(void (*f)(unsigned long), unsigned long arg) +{ + struct sched_param sp; + struct timer_list yield_timer; + + setup_timer_on_stack(&yield_timer, f, arg); + mod_timer(&yield_timer, jiffies + 2); + sp.sched_priority = 0; + sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); + set_user_nice(current, 19); + schedule(); + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); + del_timer(&yield_timer); +} + +/* + * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. + * This can happen while the corresponding CPU is either coming online + * or going offline. We cannot wait until the CPU is fully online + * before starting the kthread, because the various notifier functions + * can wait for RCU grace periods. So we park rcu_cpu_kthread() until + * the corresponding CPU is online. + * + * Return 1 if the kthread needs to stop, 0 otherwise. + * + * Caller must disable bh. This function can momentarily enable it. + */ +static int rcu_cpu_kthread_should_stop(int cpu) +{ + while (cpu_is_offline(cpu) || + !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || + smp_processor_id() != cpu) { + if (kthread_should_stop()) + return 1; + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; + per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); + local_bh_enable(); + schedule_timeout_uninterruptible(1); + if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) + set_cpus_allowed_ptr(current, cpumask_of(cpu)); + local_bh_disable(); + } + per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; + return 0; +} + +/* + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the + * earlier RCU softirq. + */ +static int rcu_cpu_kthread(void *arg) +{ + int cpu = (int)(long)arg; + unsigned long flags; + int spincnt = 0; + unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); + char work; + char *workp = &per_cpu(rcu_cpu_has_work, cpu); + + for (;;) { + *statusp = RCU_KTHREAD_WAITING; + rcu_wait(*workp != 0 || kthread_should_stop()); + local_bh_disable(); + if (rcu_cpu_kthread_should_stop(cpu)) { + local_bh_enable(); + break; + } + *statusp = RCU_KTHREAD_RUNNING; + per_cpu(rcu_cpu_kthread_loops, cpu)++; + local_irq_save(flags); + work = *workp; + *workp = 0; + local_irq_restore(flags); + if (work) + rcu_kthread_do_work(); + local_bh_enable(); + if (*workp != 0) + spincnt++; + else + spincnt = 0; + if (spincnt > 10) { + *statusp = RCU_KTHREAD_YIELDING; + rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); + spincnt = 0; + } + } + *statusp = RCU_KTHREAD_STOPPED; + return 0; +} + +/* + * Spawn a per-CPU kthread, setting up affinity and priority. + * Because the CPU hotplug lock is held, no other CPU will be attempting + * to manipulate rcu_cpu_kthread_task. There might be another CPU + * attempting to access it during boot, but the locking in kthread_bind() + * will enforce sufficient ordering. + * + * Please note that we cannot simply refuse to wake up the per-CPU + * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, + * which can result in softlockup complaints if the task ends up being + * idle for more than a couple of minutes. + * + * However, please note also that we cannot bind the per-CPU kthread to its + * CPU until that CPU is fully online. We also cannot wait until the + * CPU is fully online before we create its per-CPU kthread, as this would + * deadlock the system when CPU notifiers tried waiting for grace + * periods. So we bind the per-CPU kthread to its CPU only if the CPU + * is online. If its CPU is not yet fully online, then the code in + * rcu_cpu_kthread() will wait until it is fully online, and then do + * the binding. + */ +static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) +{ + struct sched_param sp; + struct task_struct *t; + + if (!rcu_kthreads_spawnable || + per_cpu(rcu_cpu_kthread_task, cpu) != NULL) + return 0; + t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); + if (IS_ERR(t)) + return PTR_ERR(t); + if (cpu_online(cpu)) + kthread_bind(t, cpu); + per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; + WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + per_cpu(rcu_cpu_kthread_task, cpu) = t; + wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ + return 0; +} + +/* + * Per-rcu_node kthread, which is in charge of waking up the per-CPU + * kthreads when needed. We ignore requests to wake up kthreads + * for offline CPUs, which is OK because force_quiescent_state() + * takes care of this case. + */ +static int rcu_node_kthread(void *arg) +{ + int cpu; + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp = (struct rcu_node *)arg; + struct sched_param sp; + struct task_struct *t; + + for (;;) { + rnp->node_kthread_status = RCU_KTHREAD_WAITING; + rcu_wait(atomic_read(&rnp->wakemask) != 0); + rnp->node_kthread_status = RCU_KTHREAD_RUNNING; + raw_spin_lock_irqsave(&rnp->lock, flags); + mask = atomic_xchg(&rnp->wakemask, 0); + rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { + if ((mask & 0x1) == 0) + continue; + preempt_disable(); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (!cpu_online(cpu) || t == NULL) { + preempt_enable(); + continue; + } + per_cpu(rcu_cpu_has_work, cpu) = 1; + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + preempt_enable(); + } + } + /* NOTREACHED */ + rnp->node_kthread_status = RCU_KTHREAD_STOPPED; + return 0; +} + +/* + * Set the per-rcu_node kthread's affinity to cover all CPUs that are + * served by the rcu_node in question. The CPU hotplug lock is still + * held, so the value of rnp->qsmaskinit will be stable. + * + * We don't include outgoingcpu in the affinity set, use -1 if there is + * no outgoing CPU. If there are no CPUs left in the affinity set, + * this function allows the kthread to execute on any CPU. + */ +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ + cpumask_var_t cm; + int cpu; + unsigned long mask = rnp->qsmaskinit; + + if (rnp->node_kthread_task == NULL) + return; + if (!alloc_cpumask_var(&cm, GFP_KERNEL)) + return; + cpumask_clear(cm); + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) + if ((mask & 0x1) && cpu != outgoingcpu) + cpumask_set_cpu(cpu, cm); + if (cpumask_weight(cm) == 0) { + cpumask_setall(cm); + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) + cpumask_clear_cpu(cpu, cm); + WARN_ON_ONCE(cpumask_weight(cm) == 0); + } + set_cpus_allowed_ptr(rnp->node_kthread_task, cm); + rcu_boost_kthread_setaffinity(rnp, cm); + free_cpumask_var(cm); +} + +/* + * Spawn a per-rcu_node kthread, setting priority and affinity. + * Called during boot before online/offline can happen, or, if + * during runtime, with the main CPU-hotplug locks held. So only + * one of these can be executing at a time. + */ +static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, + struct rcu_node *rnp) +{ + unsigned long flags; + int rnp_index = rnp - &rsp->node[0]; + struct sched_param sp; + struct task_struct *t; + + if (!rcu_kthreads_spawnable || + rnp->qsmaskinit == 0) + return 0; + if (rnp->node_kthread_task == NULL) { + t = kthread_create(rcu_node_kthread, (void *)rnp, + "rcun%d", rnp_index); + if (IS_ERR(t)) + return PTR_ERR(t); + raw_spin_lock_irqsave(&rnp->lock, flags); + rnp->node_kthread_task = t; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + sp.sched_priority = 99; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ + } + return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); +} + +/* + * Spawn all kthreads -- called as soon as the scheduler is running. + */ +static int __init rcu_spawn_kthreads(void) +{ + int cpu; + struct rcu_node *rnp; + + rcu_kthreads_spawnable = 1; + for_each_possible_cpu(cpu) { + per_cpu(rcu_cpu_has_work, cpu) = 0; + if (cpu_online(cpu)) + (void)rcu_spawn_one_cpu_kthread(cpu); + } + rnp = rcu_get_root(rcu_state); + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + if (NUM_RCU_NODES > 1) { + rcu_for_each_leaf_node(rcu_state, rnp) + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + } + return 0; +} +early_initcall(rcu_spawn_kthreads); + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + + /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ + if (rcu_kthreads_spawnable) { + (void)rcu_spawn_one_cpu_kthread(cpu); + if (rnp->node_kthread_task == NULL) + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + } +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1346,6 +1710,26 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } +#ifdef CONFIG_HOTPLUG_CPU + +static void rcu_stop_cpu_kthread(int cpu) +{ +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ +} + +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ +} + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP From 826c7e4147f902737b281e8a5a7d7aa33fd63316 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 4 Jun 2011 19:34:56 +0000 Subject: [PATCH 203/327] Revert "drm/i915: Enable GMBUS for post-gen2 chipsets" Revert commit 8f9a3f9b63b8cd3f03be9dc53533f90bd4120e5f. This fixes a hang when loading the eeprom driver (see bug #35572.) GMBUS will be re-enabled later, differently. Signed-off-by: Jean Delvare Reported-by: Marek Otahal Tested-by: Yermandu Patapitafious Tested-by: Andrew Lutomirski Acked-by: Chris Wilson Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_i2c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index d3b903bce7c5..d98cee60b602 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -401,8 +401,7 @@ int intel_setup_gmbus(struct drm_device *dev) bus->reg0 = i | GMBUS_RATE_100KHZ; /* XXX force bit banging until GMBUS is fully debugged */ - if (IS_GEN2(dev)) - bus->force_bit = intel_gpio_create(dev_priv, i); + bus->force_bit = intel_gpio_create(dev_priv, i); } intel_i2c_reset(dev_priv->dev); From ba7e05e95880ad80f012555fb8e925cb1f9a5d63 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 16 Jun 2011 18:14:22 +0000 Subject: [PATCH 204/327] drm/radeon/kms: fix num crtcs for Cedar and Caicos Only support 4 rather than 6. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_asic.c | 31 +++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9bd162fc9b0c..b2449629537d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -938,6 +938,13 @@ static struct radeon_asic cayman_asic = { int radeon_asic_init(struct radeon_device *rdev) { radeon_register_accessor_init(rdev); + + /* set the number of crtcs */ + if (rdev->flags & RADEON_SINGLE_CRTC) + rdev->num_crtc = 1; + else + rdev->num_crtc = 2; + switch (rdev->family) { case CHIP_R100: case CHIP_RV100: @@ -1017,6 +1024,11 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_JUNIPER: case CHIP_CYPRESS: case CHIP_HEMLOCK: + /* set num crtcs */ + if (rdev->family == CHIP_CEDAR) + rdev->num_crtc = 4; + else + rdev->num_crtc = 6; rdev->asic = &evergreen_asic; break; case CHIP_PALM: @@ -1027,10 +1039,17 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: + /* set num crtcs */ + if (rdev->family == CHIP_CAICOS) + rdev->num_crtc = 4; + else + rdev->num_crtc = 6; rdev->asic = &btc_asic; break; case CHIP_CAYMAN: rdev->asic = &cayman_asic; + /* set num crtcs */ + rdev->num_crtc = 6; break; default: /* FIXME: not supported yet */ @@ -1042,18 +1061,6 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->asic->set_memory_clock = NULL; } - /* set the number of crtcs */ - if (rdev->flags & RADEON_SINGLE_CRTC) - rdev->num_crtc = 1; - else { - if (ASIC_IS_DCE41(rdev)) - rdev->num_crtc = 2; - else if (ASIC_IS_DCE4(rdev)) - rdev->num_crtc = 6; - else - rdev->num_crtc = 2; - } - return 0; } From fbb87773655e7b0292756f9533c3fc21aca0797f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:31 -0400 Subject: [PATCH 205/327] drm/radeon/kms: rework atombios_get_encoder_mode() This should give us more reliable results if the table is called before an active device is set. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 03f124d626c2..39fa2258f235 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -660,21 +660,16 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) if (radeon_encoder_is_dp_bridge(encoder)) return ATOM_ENCODER_MODE_DP; + /* DVO is always DVO */ + if (radeon_encoder->encoder_id == ATOM_ENCODER_MODE_DVO) + return ATOM_ENCODER_MODE_DVO; + connector = radeon_get_connector_for_encoder(encoder); - if (!connector) { - switch (radeon_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1: - return ATOM_ENCODER_MODE_DVI; - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2: - default: - return ATOM_ENCODER_MODE_CRT; - } - } + /* if we don't have an active device yet, just use one of + * the connectors tied to the encoder. + */ + if (!connector) + connector = radeon_get_connector_for_encoder_init(encoder); radeon_connector = to_radeon_connector(connector); switch (connector->connector_type) { From 7ec478f835a391d27491493ebfd91f2bed98dbd9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:32 -0400 Subject: [PATCH 206/327] drm/radeon/kms: add missing external encoder action required for ddc. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 49611e2365d9..1b50ad8919d5 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -1200,6 +1200,7 @@ typedef struct _EXTERNAL_ENCODER_CONTROL_PARAMETERS_V3 #define EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING_OFF 0x10 #define EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING 0x11 #define EXTERNAL_ENCODER_ACTION_V3_DACLOAD_DETECTION 0x12 +#define EXTERNAL_ENCODER_ACTION_V3_DDC_SETUP 0x14 // ucConfig #define EXTERNAL_ENCODER_CONFIG_V3_DPLINKRATE_MASK 0x03 From d629a3ceb4fc1ab5aab737b964100d114aba1173 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:33 -0400 Subject: [PATCH 207/327] drm/radeon/kms: add support for load detection on dp bridges dp to vga bridges for example. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 20 +++++++++ drivers/gpu/drm/radeon/radeon_encoders.c | 49 +++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index c04e18ee8a87..dc7852b3c5ca 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1235,6 +1235,16 @@ radeon_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; } } + + if ((ret == connector_status_disconnected) && + radeon_connector->dac_load_detect) { + struct drm_encoder *encoder = radeon_best_single_encoder(connector); + struct drm_encoder_helper_funcs *encoder_funcs; + if (encoder) { + encoder_funcs = encoder->helper_private; + ret = encoder_funcs->detect(encoder, connector); + } + } } radeon_connector_update_scratch_regs(connector, ret); @@ -1408,6 +1418,10 @@ radeon_add_atom_connector(struct drm_device *dev, default: connector->interlace_allowed = true; connector->doublescan_allowed = true; + radeon_connector->dac_load_detect = true; + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.load_detect_property, + 1); break; case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_DVID: @@ -1429,6 +1443,12 @@ radeon_add_atom_connector(struct drm_device *dev, connector->doublescan_allowed = true; else connector->doublescan_allowed = false; + if (connector_type == DRM_MODE_CONNECTOR_DVII) { + radeon_connector->dac_load_detect = true; + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.load_detect_property, + 1); + } break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 39fa2258f235..d3449443aa1f 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -1999,6 +1999,53 @@ radeon_atom_dac_detect(struct drm_encoder *encoder, struct drm_connector *connec return connector_status_disconnected; } +static enum drm_connector_status +radeon_atom_dig_detect(struct drm_encoder *encoder, struct drm_connector *connector) +{ + struct drm_device *dev = encoder->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder); + u32 bios_0_scratch; + + if (!ASIC_IS_DCE4(rdev)) + return connector_status_unknown; + + if (!ext_encoder) + return connector_status_unknown; + + if ((radeon_connector->devices & ATOM_DEVICE_CRT_SUPPORT) == 0) + return connector_status_unknown; + + /* load detect on the dp bridge */ + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_DACLOAD_DETECTION); + + bios_0_scratch = RREG32(R600_BIOS_0_SCRATCH); + + DRM_DEBUG_KMS("Bios 0 scratch %x %08x\n", bios_0_scratch, radeon_encoder->devices); + if (radeon_connector->devices & ATOM_DEVICE_CRT1_SUPPORT) { + if (bios_0_scratch & ATOM_S0_CRT1_MASK) + return connector_status_connected; + } + if (radeon_connector->devices & ATOM_DEVICE_CRT2_SUPPORT) { + if (bios_0_scratch & ATOM_S0_CRT2_MASK) + return connector_status_connected; + } + if (radeon_connector->devices & ATOM_DEVICE_CV_SUPPORT) { + if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A)) + return connector_status_connected; + } + if (radeon_connector->devices & ATOM_DEVICE_TV1_SUPPORT) { + if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A)) + return connector_status_connected; /* CTV */ + else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A)) + return connector_status_connected; /* STV */ + } + return connector_status_disconnected; +} + static void radeon_atom_encoder_prepare(struct drm_encoder *encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); @@ -2162,7 +2209,7 @@ static const struct drm_encoder_helper_funcs radeon_atom_dig_helper_funcs = { .mode_set = radeon_atom_encoder_mode_set, .commit = radeon_atom_encoder_commit, .disable = radeon_atom_encoder_disable, - /* no detect for TMDS/LVDS yet */ + .detect = radeon_atom_dig_detect, }; static const struct drm_encoder_helper_funcs radeon_atom_dac_helper_funcs = { From 591a10e16c2a43f6f2ea5f307ab2a5afecfb9ed9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:34 -0400 Subject: [PATCH 208/327] drm/radeon/kms: fix support for DDC on dp bridges Need to set up the bridge for DDC prior to the i2c over aux transaction. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 20 ++++++++++++++++---- drivers/gpu/drm/radeon/radeon_encoders.c | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_mode.h | 1 + 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index dc7852b3c5ca..613e36f83b71 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -44,6 +44,8 @@ extern void radeon_legacy_backlight_init(struct radeon_encoder *radeon_encoder, struct drm_connector *drm_connector); +bool radeon_connector_encoder_is_dp_bridge(struct drm_connector *connector); + void radeon_connector_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; @@ -1070,10 +1072,10 @@ static int radeon_dp_get_modes(struct drm_connector *connector) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct radeon_connector_atom_dig *radeon_dig_connector = radeon_connector->con_priv; + struct drm_encoder *encoder = radeon_best_single_encoder(connector); int ret; if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { - struct drm_encoder *encoder; struct drm_display_mode *mode; if (!radeon_dig_connector->edp_on) @@ -1085,7 +1087,6 @@ static int radeon_dp_get_modes(struct drm_connector *connector) ATOM_TRANSMITTER_ACTION_POWER_OFF); if (ret > 0) { - encoder = radeon_best_single_encoder(connector); if (encoder) { radeon_fixup_lvds_native_mode(encoder, connector); /* add scaled modes */ @@ -1109,8 +1110,14 @@ static int radeon_dp_get_modes(struct drm_connector *connector) /* add scaled modes */ radeon_add_common_modes(encoder, connector); } - } else + } else { + /* need to setup ddc on the bridge */ + if (radeon_connector_encoder_is_dp_bridge(connector)) { + if (encoder) + radeon_atom_ext_encoder_setup_ddc(encoder); + } ret = radeon_ddc_get_modes(radeon_connector); + } return ret; } @@ -1194,6 +1201,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force) struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; struct radeon_connector_atom_dig *radeon_dig_connector = radeon_connector->con_priv; + struct drm_encoder *encoder = radeon_best_single_encoder(connector); if (radeon_connector->edid) { kfree(radeon_connector->edid); @@ -1201,7 +1209,6 @@ radeon_dp_detect(struct drm_connector *connector, bool force) } if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { - struct drm_encoder *encoder = radeon_best_single_encoder(connector); if (encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_display_mode *native_mode = &radeon_encoder->native_mode; @@ -1221,6 +1228,11 @@ radeon_dp_detect(struct drm_connector *connector, bool force) atombios_set_edp_panel_power(connector, ATOM_TRANSMITTER_ACTION_POWER_OFF); } else { + /* need to setup ddc on the bridge */ + if (radeon_connector_encoder_is_dp_bridge(connector)) { + if (encoder) + radeon_atom_ext_encoder_setup_ddc(encoder); + } radeon_dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector); if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { ret = connector_status_connected; diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index d3449443aa1f..8e058f1f46d6 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -2046,6 +2046,18 @@ radeon_atom_dig_detect(struct drm_encoder *encoder, struct drm_connector *connec return connector_status_disconnected; } +void +radeon_atom_ext_encoder_setup_ddc(struct drm_encoder *encoder) +{ + struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder); + + if (ext_encoder) + /* ddc_setup on the dp bridge */ + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_DDC_SETUP); + +} + static void radeon_atom_encoder_prepare(struct drm_encoder *encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 977a341266b6..f734b3737dce 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -483,6 +483,7 @@ extern void radeon_atom_encoder_init(struct radeon_device *rdev); extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t lane_num, uint8_t lane_set); +extern void radeon_atom_ext_encoder_setup_ddc(struct drm_encoder *encoder); extern int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, u8 write_byte, u8 *read_byte); From d6c669528a5367aaa5f4e712acef990b7148aee8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:36 -0400 Subject: [PATCH 209/327] drm/radeon/kms: issue blank/unblank commands for ext encoders Required for DPMS on some systems. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 8e058f1f46d6..aa2450ba0ff8 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -1521,26 +1521,29 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) } if (ext_encoder) { - int action; - switch (mode) { case DRM_MODE_DPMS_ON: default: - if (ASIC_IS_DCE41(rdev)) - action = EXTERNAL_ENCODER_ACTION_V3_ENABLE_OUTPUT; - else - action = ATOM_ENABLE; + if (ASIC_IS_DCE41(rdev)) { + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_ENABLE_OUTPUT); + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING_OFF); + } else + atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; case DRM_MODE_DPMS_STANDBY: case DRM_MODE_DPMS_SUSPEND: case DRM_MODE_DPMS_OFF: - if (ASIC_IS_DCE41(rdev)) - action = EXTERNAL_ENCODER_ACTION_V3_DISABLE_OUTPUT; - else - action = ATOM_DISABLE; + if (ASIC_IS_DCE41(rdev)) { + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING); + atombios_external_encoder_setup(encoder, ext_encoder, + EXTERNAL_ENCODER_ACTION_V3_DISABLE_OUTPUT); + } else + atombios_external_encoder_setup(encoder, ext_encoder, ATOM_DISABLE); break; } - atombios_external_encoder_setup(encoder, ext_encoder, action); } radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); From f89931f345f26c43b109191fbfcfa506781111c0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:13:35 -0400 Subject: [PATCH 210/327] drm/radeon/kms: fix handling of DP to LVDS bridges They need to be treated like eDP rather than DP. May fix: https://bugzilla.kernel.org/show_bug.cgi?id=34822 Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 613e36f83b71..cbfca3a24fdf 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1075,7 +1075,8 @@ static int radeon_dp_get_modes(struct drm_connector *connector) struct drm_encoder *encoder = radeon_best_single_encoder(connector); int ret; - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || + (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { struct drm_display_mode *mode; if (!radeon_dig_connector->edp_on) @@ -1208,7 +1209,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector->edid = NULL; } - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || + (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { if (encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_display_mode *native_mode = &radeon_encoder->native_mode; @@ -1271,7 +1273,8 @@ static int radeon_dp_mode_valid(struct drm_connector *connector, /* XXX check mode bandwidth */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || + (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { struct drm_encoder *encoder = radeon_best_single_encoder(connector); if ((mode->hdisplay < 320) || (mode->vdisplay < 240)) @@ -1281,7 +1284,7 @@ static int radeon_dp_mode_valid(struct drm_connector *connector, struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_display_mode *native_mode = &radeon_encoder->native_mode; - /* AVIVO hardware supports downscaling modes larger than the panel + /* AVIVO hardware supports downscaling modes larger than the panel * to the panel size, but I'm not sure this is desirable. */ if ((mode->hdisplay > native_mode->hdisplay) || From cc9f67a0a0b076b82ab1af3b2add82e19a33d5de Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 16 Jun 2011 10:06:16 -0400 Subject: [PATCH 211/327] drm/radeon/kms/atom: AdjustPixelClock fixes for DP bridges Need to set the external transmitter type properly in AdjustPixelClock to get the properly output. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_crtc.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_mode.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 84a69e7fa11e..9541995e4b21 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -671,6 +671,13 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, DISPPLL_CONFIG_DUAL_LINK; } } + if (radeon_encoder_is_dp_bridge(encoder)) { + struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder); + struct radeon_encoder *ext_radeon_encoder = to_radeon_encoder(ext_encoder); + args.v3.sInput.ucExtTransmitterID = ext_radeon_encoder->encoder_id; + } else + args.v3.sInput.ucExtTransmitterID = 0; + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index f734b3737dce..6df4e3cec0c2 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -484,6 +484,7 @@ extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t lane_num, uint8_t lane_set); extern void radeon_atom_ext_encoder_setup_ddc(struct drm_encoder *encoder); +extern struct drm_encoder *radeon_atom_get_external_encoder(struct drm_encoder *encoder); extern int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, u8 write_byte, u8 *read_byte); From 11b0a5b89adbfaf4e7d31f2482f49471dd983692 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 16 Jun 2011 10:06:17 -0400 Subject: [PATCH 212/327] drm/radeon/kms: set DP link config properly for DP bridges DP clock and lanes were not set properly for DP bridges. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index aa2450ba0ff8..f55b64cb59d1 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -367,7 +367,8 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder, } if (ASIC_IS_DCE3(rdev) && - (radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT))) { + ((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) || + radeon_encoder_is_dp_bridge(encoder))) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); radeon_dp_set_link_config(connector, mode); } From b81157d016a48b8025ccfcb286827679b35f16aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2011 17:39:06 -0400 Subject: [PATCH 213/327] drm/radeon/kms: use helper functions for fence read/write The existing code assumed scratch registers in a number of places while in most cases we are be using writeback and events rather than scratch registers. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_fence.c | 51 +++++++++++++++++++-------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 1f8229436570..021d2b6b556f 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -40,6 +40,35 @@ #include "radeon.h" #include "radeon_trace.h" +static void radeon_fence_write(struct radeon_device *rdev, u32 seq) +{ + if (rdev->wb.enabled) { + u32 scratch_index; + if (rdev->wb.use_event) + scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + else + scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + rdev->wb.wb[scratch_index/4] = cpu_to_le32(seq);; + } else + WREG32(rdev->fence_drv.scratch_reg, seq); +} + +static u32 radeon_fence_read(struct radeon_device *rdev) +{ + u32 seq; + + if (rdev->wb.enabled) { + u32 scratch_index; + if (rdev->wb.use_event) + scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + else + scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; + seq = le32_to_cpu(rdev->wb.wb[scratch_index/4]); + } else + seq = RREG32(rdev->fence_drv.scratch_reg); + return seq; +} + int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { unsigned long irq_flags; @@ -50,12 +79,12 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) return 0; } fence->seq = atomic_add_return(1, &rdev->fence_drv.seq); - if (!rdev->cp.ready) { + if (!rdev->cp.ready) /* FIXME: cp is not running assume everythings is done right * away */ - WREG32(rdev->fence_drv.scratch_reg, fence->seq); - } else + radeon_fence_write(rdev, fence->seq); + else radeon_fence_ring_emit(rdev, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); @@ -73,15 +102,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev) bool wake = false; unsigned long cjiffies; - if (rdev->wb.enabled) { - u32 scratch_index; - if (rdev->wb.use_event) - scratch_index = R600_WB_EVENT_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - else - scratch_index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv.scratch_reg - rdev->scratch.reg_base; - seq = le32_to_cpu(rdev->wb.wb[scratch_index/4]); - } else - seq = RREG32(rdev->fence_drv.scratch_reg); + seq = radeon_fence_read(rdev); if (seq != rdev->fence_drv.last_seq) { rdev->fence_drv.last_seq = seq; rdev->fence_drv.last_jiffies = jiffies; @@ -251,7 +272,7 @@ retry: r = radeon_gpu_reset(rdev); if (r) return r; - WREG32(rdev->fence_drv.scratch_reg, fence->seq); + radeon_fence_write(rdev, fence->seq); rdev->gpu_lockup = false; } timeout = RADEON_FENCE_JIFFIES_TIMEOUT; @@ -351,7 +372,7 @@ int radeon_fence_driver_init(struct radeon_device *rdev) write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); return r; } - WREG32(rdev->fence_drv.scratch_reg, 0); + radeon_fence_write(rdev, 0); atomic_set(&rdev->fence_drv.seq, 0); INIT_LIST_HEAD(&rdev->fence_drv.created); INIT_LIST_HEAD(&rdev->fence_drv.emited); @@ -391,7 +412,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct radeon_fence *fence; seq_printf(m, "Last signaled fence 0x%08X\n", - RREG32(rdev->fence_drv.scratch_reg)); + radeon_fence_read(rdev)); if (!list_empty(&rdev->fence_drv.emited)) { fence = list_entry(rdev->fence_drv.emited.prev, struct radeon_fence, list); From d40261236e8e278cb1936cb5e934262971692b10 Mon Sep 17 00:00:00 2001 From: "Marius B. Kotsbak" Date: Sun, 12 Jun 2011 02:35:02 +0000 Subject: [PATCH 214/327] net/usb: Add Samsung Kalmia driver for Samsung GT-B3730 Introducing driver for the network port of Samsung Kalmia based USB LTE modems. It has also an ACM interface that previous patches associates with the "option" module. To access those interfaces, the modem must first be switched from modem mode using a tool like usb_modeswitch. As the proprietary protocol has been discovered by watching the MS Windows driver behavior, there might be errors in the protocol handling, but stable and fast connection has been established for hours with Norwegian operator NetCom that distributes this modem with their LTE/4G subscription. More and updated information about how to use this driver is available here: http://www.draisberghof.de/usb_modeswitch/bb/viewtopic.php?t=465 https://github.com/mkotsbak/Samsung-GT-B3730-linux-driver Signed-off-by: Marius B. Kotsbak Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 10 + drivers/net/usb/Makefile | 1 + drivers/net/usb/kalmia.c | 384 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 395 insertions(+) create mode 100644 drivers/net/usb/kalmia.c diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 9d4f9117260f..84d4608153c9 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -385,6 +385,16 @@ config USB_NET_CX82310_ETH router with USB ethernet port. This driver is for routers only, it will not work with ADSL modems (use cxacru driver instead). +config USB_NET_KALMIA + tristate "Samsung Kalmia based LTE USB modem" + depends on USB_USBNET + help + Choose this option if you have a Samsung Kalmia based USB modem + as Samsung GT-B3730. + + To compile this driver as a module, choose M here: the + module will be called kalmia. + config USB_HSO tristate "Option USB High Speed Mobile Devices" depends on USB && RFKILL diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index c7ec8a5f0a90..c203fa21f6b1 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_USB_NET_MCS7830) += mcs7830.o obj-$(CONFIG_USB_USBNET) += usbnet.o obj-$(CONFIG_USB_NET_INT51X1) += int51x1.o obj-$(CONFIG_USB_CDC_PHONET) += cdc-phonet.o +obj-$(CONFIG_USB_NET_KALMIA) += kalmia.o obj-$(CONFIG_USB_IPHETH) += ipheth.o obj-$(CONFIG_USB_SIERRA_NET) += sierra_net.o obj-$(CONFIG_USB_NET_CX82310_ETH) += cx82310_eth.o diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c new file mode 100644 index 000000000000..d965fb1e013e --- /dev/null +++ b/drivers/net/usb/kalmia.c @@ -0,0 +1,384 @@ +/* + * USB network interface driver for Samsung Kalmia based LTE USB modem like the + * Samsung GT-B3730 and GT-B3710. + * + * Copyright (C) 2011 Marius Bjoernstad Kotsbak + * + * Sponsored by Quicklink Video Distribution Services Ltd. + * + * Based on the cdc_eem module. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The Samsung Kalmia based LTE USB modems have a CDC ACM port for modem control + * handled by the "option" module and an ethernet data port handled by this + * module. + * + * The stick must first be switched into modem mode by usb_modeswitch + * or similar tool. Then the modem gets sent two initialization packets by + * this module, which gives the MAC address of the device. User space can then + * connect the modem using AT commands through the ACM port and then use + * DHCP on the network interface exposed by this module. Network packets are + * sent to and from the modem in a proprietary format discovered after watching + * the behavior of the windows driver for the modem. + * + * More information about the use of the modem is available in usb_modeswitch + * forum and the project page: + * + * http://www.draisberghof.de/usb_modeswitch/bb/viewtopic.php?t=465 + * https://github.com/mkotsbak/Samsung-GT-B3730-linux-driver + */ + +/* #define DEBUG */ +/* #define VERBOSE */ + +#define KALMIA_HEADER_LENGTH 6 +#define KALMIA_ALIGN_SIZE 4 +#define KALMIA_USB_TIMEOUT 10000 + +/*-------------------------------------------------------------------------*/ + +static int +kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, + u8 *buffer, u8 expected_len) +{ + int act_len; + int status; + + netdev_dbg(dev->net, "Sending init packet"); + + status = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 0x02), + init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT); + if (status != 0) { + netdev_err(dev->net, + "Error sending init packet. Status %i, length %i\n", + status, act_len); + return status; + } + else if (act_len != init_msg_len) { + netdev_err(dev->net, + "Did not send all of init packet. Bytes sent: %i", + act_len); + } + else { + netdev_dbg(dev->net, "Successfully sent init packet."); + } + + status = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, 0x81), + buffer, expected_len, &act_len, KALMIA_USB_TIMEOUT); + + if (status != 0) + netdev_err(dev->net, + "Error receiving init result. Status %i, length %i\n", + status, act_len); + else if (act_len != expected_len) + netdev_err(dev->net, "Unexpected init result length: %i\n", + act_len); + + return status; +} + +static int +kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr) +{ + char init_msg_1[] = + { 0x57, 0x50, 0x04, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x00 }; + char init_msg_2[] = + { 0x57, 0x50, 0x04, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0xf4, + 0x00, 0x00 }; + char receive_buf[28]; + int status; + + status = kalmia_send_init_packet(dev, init_msg_1, sizeof(init_msg_1) + / sizeof(init_msg_1[0]), receive_buf, 24); + if (status != 0) + return status; + + status = kalmia_send_init_packet(dev, init_msg_2, sizeof(init_msg_2) + / sizeof(init_msg_2[0]), receive_buf, 28); + if (status != 0) + return status; + + memcpy(ethernet_addr, receive_buf + 10, ETH_ALEN); + + return status; +} + +static int +kalmia_bind(struct usbnet *dev, struct usb_interface *intf) +{ + u8 status; + u8 ethernet_addr[ETH_ALEN]; + + /* Don't bind to AT command interface */ + if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + return -EINVAL; + + dev->in = usb_rcvbulkpipe(dev->udev, 0x81 & USB_ENDPOINT_NUMBER_MASK); + dev->out = usb_sndbulkpipe(dev->udev, 0x02 & USB_ENDPOINT_NUMBER_MASK); + dev->status = NULL; + + dev->net->hard_header_len += KALMIA_HEADER_LENGTH; + dev->hard_mtu = 1400; + dev->rx_urb_size = dev->hard_mtu * 10; // Found as optimal after testing + + status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr); + + if (status < 0) { + usb_set_intfdata(intf, NULL); + usb_driver_release_interface(driver_of(intf), intf); + return status; + } + + memcpy(dev->net->dev_addr, ethernet_addr, ETH_ALEN); + memcpy(dev->net->perm_addr, ethernet_addr, ETH_ALEN); + + return status; +} + +static struct sk_buff * +kalmia_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) +{ + struct sk_buff *skb2 = NULL; + u16 content_len; + unsigned char *header_start; + unsigned char ether_type_1, ether_type_2; + u8 remainder, padlen = 0; + + if (!skb_cloned(skb)) { + int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + + if ((tailroom >= KALMIA_ALIGN_SIZE) && (headroom + >= KALMIA_HEADER_LENGTH)) + goto done; + + if ((headroom + tailroom) > (KALMIA_HEADER_LENGTH + + KALMIA_ALIGN_SIZE)) { + skb->data = memmove(skb->head + KALMIA_HEADER_LENGTH, + skb->data, skb->len); + skb_set_tail_pointer(skb, skb->len); + goto done; + } + } + + skb2 = skb_copy_expand(skb, KALMIA_HEADER_LENGTH, + KALMIA_ALIGN_SIZE, flags); + if (!skb2) + return NULL; + + dev_kfree_skb_any(skb); + skb = skb2; + + done: header_start = skb_push(skb, KALMIA_HEADER_LENGTH); + ether_type_1 = header_start[KALMIA_HEADER_LENGTH + 12]; + ether_type_2 = header_start[KALMIA_HEADER_LENGTH + 13]; + + netdev_dbg(dev->net, "Sending etherType: %02x%02x", ether_type_1, + ether_type_2); + + /* According to empiric data for data packages */ + header_start[0] = 0x57; + header_start[1] = 0x44; + content_len = skb->len - KALMIA_HEADER_LENGTH; + header_start[2] = (content_len & 0xff); /* low byte */ + header_start[3] = (content_len >> 8); /* high byte */ + + header_start[4] = ether_type_1; + header_start[5] = ether_type_2; + + /* Align to 4 bytes by padding with zeros */ + remainder = skb->len % KALMIA_ALIGN_SIZE; + if (remainder > 0) { + padlen = KALMIA_ALIGN_SIZE - remainder; + memset(skb_put(skb, padlen), 0, padlen); + } + + netdev_dbg( + dev->net, + "Sending package with length %i and padding %i. Header: %02x:%02x:%02x:%02x:%02x:%02x.", + content_len, padlen, header_start[0], header_start[1], + header_start[2], header_start[3], header_start[4], + header_start[5]); + + return skb; +} + +static int +kalmia_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + /* + * Our task here is to strip off framing, leaving skb with one + * data frame for the usbnet framework code to process. + */ + const u8 HEADER_END_OF_USB_PACKET[] = + { 0x57, 0x5a, 0x00, 0x00, 0x08, 0x00 }; + const u8 EXPECTED_UNKNOWN_HEADER_1[] = + { 0x57, 0x43, 0x1e, 0x00, 0x15, 0x02 }; + const u8 EXPECTED_UNKNOWN_HEADER_2[] = + { 0x57, 0x50, 0x0e, 0x00, 0x00, 0x00 }; + u8 i = 0; + + /* incomplete header? */ + if (skb->len < KALMIA_HEADER_LENGTH) + return 0; + + do { + struct sk_buff *skb2 = NULL; + u8 *header_start; + u16 usb_packet_length, ether_packet_length; + int is_last; + + header_start = skb->data; + + if (unlikely(header_start[0] != 0x57 || header_start[1] != 0x44)) { + if (!memcmp(header_start, EXPECTED_UNKNOWN_HEADER_1, + sizeof(EXPECTED_UNKNOWN_HEADER_1)) || !memcmp( + header_start, EXPECTED_UNKNOWN_HEADER_2, + sizeof(EXPECTED_UNKNOWN_HEADER_2))) { + netdev_dbg( + dev->net, + "Received expected unknown frame header: %02x:%02x:%02x:%02x:%02x:%02x. Package length: %i\n", + header_start[0], header_start[1], + header_start[2], header_start[3], + header_start[4], header_start[5], + skb->len - KALMIA_HEADER_LENGTH); + } + else { + netdev_err( + dev->net, + "Received unknown frame header: %02x:%02x:%02x:%02x:%02x:%02x. Package length: %i\n", + header_start[0], header_start[1], + header_start[2], header_start[3], + header_start[4], header_start[5], + skb->len - KALMIA_HEADER_LENGTH); + return 0; + } + } + else + netdev_dbg( + dev->net, + "Received header: %02x:%02x:%02x:%02x:%02x:%02x. Package length: %i\n", + header_start[0], header_start[1], header_start[2], + header_start[3], header_start[4], header_start[5], + skb->len - KALMIA_HEADER_LENGTH); + + /* subtract start header and end header */ + usb_packet_length = skb->len - (2 * KALMIA_HEADER_LENGTH); + ether_packet_length = header_start[2] + (header_start[3] << 8); + skb_pull(skb, KALMIA_HEADER_LENGTH); + + /* Some small packets misses end marker */ + if (usb_packet_length < ether_packet_length) { + ether_packet_length = usb_packet_length + + KALMIA_HEADER_LENGTH; + is_last = true; + } + else { + netdev_dbg(dev->net, "Correct package length #%i", i + + 1); + + is_last = (memcmp(skb->data + ether_packet_length, + HEADER_END_OF_USB_PACKET, + sizeof(HEADER_END_OF_USB_PACKET)) == 0); + if (!is_last) { + header_start = skb->data + ether_packet_length; + netdev_dbg( + dev->net, + "End header: %02x:%02x:%02x:%02x:%02x:%02x. Package length: %i\n", + header_start[0], header_start[1], + header_start[2], header_start[3], + header_start[4], header_start[5], + skb->len - KALMIA_HEADER_LENGTH); + } + } + + if (is_last) { + skb2 = skb; + } + else { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + return 0; + } + + skb_trim(skb2, ether_packet_length); + + if (is_last) { + return 1; + } + else { + usbnet_skb_return(dev, skb2); + skb_pull(skb, ether_packet_length); + } + + i++; + } + while (skb->len); + + return 1; +} + +static const struct driver_info kalmia_info = { + .description = "Samsung Kalmia LTE USB dongle", + .flags = FLAG_WWAN, + .bind = kalmia_bind, + .rx_fixup = kalmia_rx_fixup, + .tx_fixup = kalmia_tx_fixup +}; + +/*-------------------------------------------------------------------------*/ + +static const struct usb_device_id products[] = { + /* The unswitched USB ID, to get the module auto loaded: */ + { USB_DEVICE(0x04e8, 0x689a) }, + /* The stick swithed into modem (by e.g. usb_modeswitch): */ + { USB_DEVICE(0x04e8, 0x6889), + .driver_info = (unsigned long) &kalmia_info, }, + { /* EMPTY == end of list */} }; +MODULE_DEVICE_TABLE( usb, products); + +static struct usb_driver kalmia_driver = { + .name = "kalmia", + .id_table = products, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, + .suspend = usbnet_suspend, + .resume = usbnet_resume +}; + +static int __init kalmia_init(void) +{ + return usb_register(&kalmia_driver); +} +module_init( kalmia_init); + +static void __exit kalmia_exit(void) +{ + usb_deregister(&kalmia_driver); +} +module_exit( kalmia_exit); + +MODULE_AUTHOR("Marius Bjoernstad Kotsbak "); +MODULE_DESCRIPTION("Samsung Kalmia USB network driver"); +MODULE_LICENSE("GPL"); From 62b2bcb49cca72f6d3f39f831127a6ab315a475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Mon, 13 Jun 2011 15:04:43 +0000 Subject: [PATCH 215/327] IGMP snooping: set mrouters_only flag for IPv4 traffic properly Upon reception of a IGMP/IGMPv2 membership report the kernel sets the mrouters_only flag in a skb that may be a clone of the original skb, which means that sometimes the bridge loses track of membership report packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Tested-by: Hayato Kakuta Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2f14eafdeeab..a6d87c17bb03 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1424,7 +1424,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: From fc2af6c73fc9449cd5894a36bb76b8f8c0e49fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Mon, 13 Jun 2011 15:06:58 +0000 Subject: [PATCH 216/327] IGMP snooping: set mrouters_only flag for IPv6 traffic properly Upon reception of a MGM report packet the kernel sets the mrouters_only flag in a skb that is a clone of the original skb, which means that the bridge loses track of MGM packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a6d87c17bb03..29b9812c8da0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1543,7 +1543,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } From d3ab6fde14b69c346939cd00765b3826a4760e5c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Jun 2011 16:26:22 +0000 Subject: [PATCH 217/327] MAINTAINERS: Update EBTABLES mailing list It moved to netfilter-devel@vger.kernel.org. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6a1c430d2821..0f1515a9780d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2292,8 +2292,7 @@ F: drivers/scsi/eata_pio.* EBTABLES M: Bart De Schuymer -L: ebtables-user@lists.sourceforge.net -L: ebtables-devel@lists.sourceforge.net +L: netfilter-devel@vger.kernel.org W: http://ebtables.sourceforge.net/ S: Maintained F: include/linux/netfilter_bridge/ebt_*.h From e6539e2b7aee117619153daaf61566dba1e04205 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 14 Jun 2011 21:58:13 +0000 Subject: [PATCH 218/327] ppp: use PPP_TRANS instead of the magic number 0x20 Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- drivers/net/ppp_async.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index a1b82c9c67d2..c554a397e558 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -523,7 +523,7 @@ static void ppp_async_process(unsigned long arg) #define PUT_BYTE(ap, buf, c, islcp) do { \ if ((islcp && c < 0x20) || (ap->xaccm[c >> 5] & (1 << (c & 0x1f)))) {\ *buf++ = PPP_ESCAPE; \ - *buf++ = c ^ 0x20; \ + *buf++ = c ^ PPP_TRANS; \ } else \ *buf++ = c; \ } while (0) @@ -896,7 +896,7 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, sp = skb_put(skb, n); memcpy(sp, buf, n); if (ap->state & SC_ESCAPE) { - sp[0] ^= 0x20; + sp[0] ^= PPP_TRANS; ap->state &= ~SC_ESCAPE; } } From f1dc045e685ea5424b3445c1ccaa0a25b3d661ec Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 14 Jun 2011 22:07:58 +0000 Subject: [PATCH 219/327] phylib: Allow BCM63XX PHY to be selected only on BCM63XX. This PHY is available integrated into BCM63xx series SOCs only. Signed-off-by: Ralf Baechle drivers/net/phy/Kconfig | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 392a6c4b72e5..a70244306c94 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -58,6 +58,7 @@ config BROADCOM_PHY config BCM63XX_PHY tristate "Drivers for Broadcom 63xx SOCs internal PHY" + depends on BCM63XX ---help--- Currently supports the 6348 and 6358 PHYs. From 2331038a96ecdad76c50ab223fd48d656d8a1184 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 14 Jun 2011 23:55:19 +0000 Subject: [PATCH 220/327] dp83640: fix phy status frame event parsing If two eternal time stamp events occur at nearly the same time, the phyter will add an extra word into the status frame. This commit fixes the parsing code to recognize and skip over the extra word. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index b0c9522bb535..ead323e56739 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -543,11 +543,20 @@ static void recalibrate(struct dp83640_clock *clock) /* time stamping methods */ -static void decode_evnt(struct dp83640_private *dp83640, - struct phy_txts *phy_txts, u16 ests) +static int decode_evnt(struct dp83640_private *dp83640, + void *data, u16 ests) { + struct phy_txts *phy_txts; struct ptp_clock_event event; int words = (ests >> EVNT_TS_LEN_SHIFT) & EVNT_TS_LEN_MASK; + u16 ext_status = 0; + + if (ests & MULT_EVNT) { + ext_status = *(u16 *) data; + data += sizeof(ext_status); + } + + phy_txts = data; switch (words) { /* fall through in every case */ case 3: @@ -565,6 +574,9 @@ static void decode_evnt(struct dp83640_private *dp83640, event.timestamp = phy2txts(&dp83640->edata); ptp_clock_event(dp83640->clock->ptp_clock, &event); + + words = ext_status ? words + 2 : words + 1; + return words * sizeof(u16); } static void decode_rxts(struct dp83640_private *dp83640, @@ -643,9 +655,7 @@ static void decode_status_frame(struct dp83640_private *dp83640, } else if (PSF_EVNT == type && len >= sizeof(*phy_txts)) { - phy_txts = (struct phy_txts *) ptr; - decode_evnt(dp83640, phy_txts, ests); - size = sizeof(*phy_txts); + size = decode_evnt(dp83640, ptr, ests); } else { size = 0; From ae6e86b7fb15520ac64513ad643de63e0b077aa5 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 14 Jun 2011 23:55:20 +0000 Subject: [PATCH 221/327] dp83640: drop PHY status frames in the driver. The dp83640 PHY provides time stamp and other information via special PHY status frames. Previously, the driver decoded the frames and then let the network stack drop them. This works fine when the PTP messages come over UDP. However, when receiving PTP messages via L2 packets, this creates a problem. The status frames use the official PTP destination MAC address, and so they are delivered to user space along with the "real" frames, causing confusion for applications. This commit fixes the issue by simply dropping the PHY status frames in the driver. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index ead323e56739..2cd8dc5847b4 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1044,8 +1044,8 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, if (is_status_frame(skb, type)) { decode_status_frame(dp83640, skb); - /* Let the stack drop this frame. */ - return false; + kfree_skb(skb); + return true; } SKB_PTP_TYPE(skb) = type; From bebd097a0af8bd6c51f50a65f3a435019b0e906a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 15 Jun 2011 05:25:01 +0000 Subject: [PATCH 222/327] tun: teach the tun/tap driver to support netpoll Commit 8d8fc29d02a33e4bd5f4fa47823c1fd386346093 changed the behavior of slave devices in regards to netpoll. Specifically it created a mutually exclusive relationship between being a slave and a netpoll-capable device. This creates problems for KVM because guests relied on needing netconsole active on a slave device to a bridge. Ideally libvirtd could just attach netconsole to the bridge device instead, but thats currently infeasible, because while the bridge device supports netpoll, it requires that all slave interface also support it, but the tun/tap driver currently does not. The most direct solution is to teach tun/tap to support netpoll, which is implemented by the patch below. I've not tested this yet, but its pretty straightforward. Signed-off-by: Neil Horman Reported-by: Rik van Riel CC: Rik van Riel CC: Maxim Krasnyansky CC: Cong Wang CC: "David S. Miller" Reviewed-by: Rik van Riel Tested-by: Rik van Riel Reviewed-by: WANG Cong Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 74e94054ab1a..5235f48be1be 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -460,7 +460,23 @@ static u32 tun_net_fix_features(struct net_device *dev, u32 features) return (features & tun->set_features) | (features & ~TUN_USER_FEATURES); } - +#ifdef CONFIG_NET_POLL_CONTROLLER +static void tun_poll_controller(struct net_device *dev) +{ + /* + * Tun only receives frames when: + * 1) the char device endpoint gets data from user space + * 2) the tun socket gets a sendmsg call from user space + * Since both of those are syncronous operations, we are guaranteed + * never to have pending data when we poll for it + * so theres nothing to do here but return. + * We need this though so netpoll recognizes us as an interface that + * supports polling, which enables bridge devices in virt setups to + * still use netconsole + */ + return; +} +#endif static const struct net_device_ops tun_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@ -468,6 +484,9 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_fix_features = tun_net_fix_features, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tun_poll_controller, +#endif }; static const struct net_device_ops tap_netdev_ops = { @@ -480,6 +499,9 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_set_multicast_list = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tun_poll_controller, +#endif }; /* Initialize net device. */ From a1b7f85e4f632f9cc342d8a34a3903feaf47a261 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Wed, 15 Jun 2011 12:38:25 +0000 Subject: [PATCH 223/327] caif: Bugfix - XOFF removed channel from caif-mux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XOFF was mixed up with DOWN indication, causing causing CAIF channel to be removed from mux and all incoming traffic to be lost after receiving flow-off. Fix this by replacing FLOW_OFF with DOWN notification. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfmuxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 3a66b8c10e09..c23979e79dfa 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || + if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && layer->id != 0) { From e3cb78c772de593afa720687ce3abbed8d93b0c3 Mon Sep 17 00:00:00 2001 From: Antoine Reversat Date: Thu, 16 Jun 2011 10:47:13 +0000 Subject: [PATCH 224/327] vlan: don't call ndo_vlan_rx_register on hardware that doesn't have vlan support This patch removes the call to ndo_vlan_rx_register if the underlying device doesn't have hardware support for VLAN. Signed-off-by: Antoine Reversat Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c7a581a96894..917ecb93ea28 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -205,7 +205,7 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register) + if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX)) ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } From 118133e6580a0c912cda86109b6468b5ffe73f1c Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Thu, 16 Jun 2011 12:31:58 +0000 Subject: [PATCH 225/327] netdev: bfin_mac: fix memory leak when freeing dma descriptors The size of the desc array is not the size of the desc structure, so when we try to free up things, we leak some parts. Reported-by: Regis Dargent Signed-off-by: Sonic Zhang Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- drivers/net/bfin_mac.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 68d45ba2d9b9..6c019e148546 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -52,13 +52,13 @@ MODULE_DESCRIPTION(DRV_DESC); MODULE_ALIAS("platform:bfin_mac"); #if defined(CONFIG_BFIN_MAC_USE_L1) -# define bfin_mac_alloc(dma_handle, size) l1_data_sram_zalloc(size) -# define bfin_mac_free(dma_handle, ptr) l1_data_sram_free(ptr) +# define bfin_mac_alloc(dma_handle, size, num) l1_data_sram_zalloc(size*num) +# define bfin_mac_free(dma_handle, ptr, num) l1_data_sram_free(ptr) #else -# define bfin_mac_alloc(dma_handle, size) \ - dma_alloc_coherent(NULL, size, dma_handle, GFP_KERNEL) -# define bfin_mac_free(dma_handle, ptr) \ - dma_free_coherent(NULL, sizeof(*ptr), ptr, dma_handle) +# define bfin_mac_alloc(dma_handle, size, num) \ + dma_alloc_coherent(NULL, size*num, dma_handle, GFP_KERNEL) +# define bfin_mac_free(dma_handle, ptr, num) \ + dma_free_coherent(NULL, sizeof(*ptr)*num, ptr, dma_handle) #endif #define PKT_BUF_SZ 1580 @@ -95,7 +95,7 @@ static void desc_list_free(void) t = t->next; } } - bfin_mac_free(dma_handle, tx_desc); + bfin_mac_free(dma_handle, tx_desc, CONFIG_BFIN_TX_DESC_NUM); } if (rx_desc) { @@ -109,7 +109,7 @@ static void desc_list_free(void) r = r->next; } } - bfin_mac_free(dma_handle, rx_desc); + bfin_mac_free(dma_handle, rx_desc, CONFIG_BFIN_RX_DESC_NUM); } } @@ -126,13 +126,13 @@ static int desc_list_init(void) #endif tx_desc = bfin_mac_alloc(&dma_handle, - sizeof(struct net_dma_desc_tx) * + sizeof(struct net_dma_desc_tx), CONFIG_BFIN_TX_DESC_NUM); if (tx_desc == NULL) goto init_error; rx_desc = bfin_mac_alloc(&dma_handle, - sizeof(struct net_dma_desc_rx) * + sizeof(struct net_dma_desc_rx), CONFIG_BFIN_RX_DESC_NUM); if (rx_desc == NULL) goto init_error; From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Tue, 29 Mar 2011 12:35:04 -0400 Subject: [PATCH 226/327] generic-ipi: Fix kexec boot crash by initializing call_single_queue before enabling interrupts There is a problem that kdump(2nd kernel) sometimes hangs up due to a pending IPI from 1st kernel. Kernel panic occurs because IPI comes before call_single_queue is initialized. To fix the crash, rename init_call_single_data() to call_function_init() and call it in start_kernel() so that call_single_queue can be initialized before enabling interrupts. The details of the crash are: (1) 2nd kernel boots up (2) A pending IPI from 1st kernel comes when irqs are first enabled in start_kernel(). (3) Kernel tries to handle the interrupt, but call_single_queue is not initialized yet at this point. As a result, in the generic_smp_call_function_single_interrupt(), NULL pointer dereference occurs when list_replace_init() tries to access &q->list.next. Therefore this patch changes the name of init_call_single_data() to call_function_init() and calls it before local_irq_enable() in start_kernel(). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Acked-by: Neil Horman Acked-by: Vivek Goyal Acked-by: Peter Zijlstra Cc: Milton Miller Cc: Jens Axboe Cc: Paul E. McKenney Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/smp.h | 5 ++++- init/main.c | 1 + kernel/smp.c | 5 +---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ad824d510a2..8cc38d3bab0c 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -85,12 +85,15 @@ int smp_call_function_any(const struct cpumask *mask, * Generic and arch helpers */ #ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); void ipi_call_lock(void); void ipi_call_unlock(void); void ipi_call_lock_irq(void); void ipi_call_unlock_irq(void); +#else +static inline void call_function_init(void) { } #endif /* @@ -134,7 +137,7 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) { } +static inline void call_function_init(void) { } static inline int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, diff --git a/init/main.c b/init/main.c index cafba67c13bf..d7211faed2ad 100644 --- a/init/main.c +++ b/init/main.c @@ -542,6 +542,7 @@ asmlinkage void __init start_kernel(void) timekeeping_init(); time_init(); profile_init(); + call_function_init(); if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); diff --git a/kernel/smp.c b/kernel/smp.c index 73a195193558..fb67dfa8394e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { .notifier_call = hotplug_cfd, }; -static int __cpuinit init_call_single_data(void) +void __init call_function_init(void) { void *cpu = (void *)(long)smp_processor_id(); int i; @@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void) hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); register_cpu_notifier(&hotplug_cfd_notifier); - - return 0; } -early_initcall(init_call_single_data); /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources From cf6f1ff17f56c275424c5a341fc4d27ccbbfa71c Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 17 Jun 2011 08:18:35 +0200 Subject: [PATCH 227/327] ALSA: isight: adjust for new queueing API Since commit 13882a82ee16 (optimize iso queueing by setting wake only after the last packet), drivers are required to call fw_iso_context_queue_flush() after queueing a batch of packets. The missing call would have an effect only if the controller queue underruns, but then the DMA would stop completely. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai --- sound/firewire/isight.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 86ee16ca365e..440030818db7 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -209,6 +209,7 @@ static void isight_packet(struct fw_iso_context *context, u32 cycle, isight->packet_index = -1; return; } + fw_iso_context_queue_flush(isight->context); if (++index >= QUEUE_LENGTH) index = 0; From ad2409413d09fca763be1ac5161f2a9d82367903 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 17 Jun 2011 14:23:46 +0200 Subject: [PATCH 228/327] ALSA: hda - Fix no NID error with VIA codecs The via driver spews warnigs like hda-codec: no NID for mapping control Independent HP:0:0 with some codecs because snd_hda_add_nid() is called with nid=0. This patch fixes it by skipping the call when no corresponding widget is found. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 605c99e1e520..c952582fb218 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -832,10 +832,13 @@ static int via_hp_build(struct hda_codec *codec) knew->subdevice = HDA_SUBDEV_NID_FLAG | nid; knew->private_value = nid; - knew = via_clone_control(spec, &via_hp_mixer[1]); - if (knew == NULL) - return -ENOMEM; - knew->subdevice = side_mute_channel(spec); + nid = side_mute_channel(spec); + if (nid) { + knew = via_clone_control(spec, &via_hp_mixer[1]); + if (knew == NULL) + return -ENOMEM; + knew->subdevice = nid; + } return 0; } From 5afa9133cfe67f1bfead6049a9640c9262a7101c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2011 10:14:59 -0400 Subject: [PATCH 229/327] SUNRPC: Ensure the RPC client only quits on fatal signals Fix a couple of instances where we were exiting the RPC client on arbitrary signals. We should only do so on fatal signals. Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/clnt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 339ba64cce1e..5daf6cc4faea 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -577,13 +577,13 @@ retry: } inode = &gss_msg->inode->vfs_inode; for (;;) { - prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&inode->i_lock); - if (signalled()) { + if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 566bcfd067f6..8c9141583d6f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1061,7 +1061,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !signalled()) { + if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; From e479c60456ef22b0869432887216186aabaed086 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 17 Jun 2011 04:35:37 -0400 Subject: [PATCH 230/327] spi/bfin_spi: fix handling of default bits per word setting The default bits per word setting should be 8 bits, but since most of our devices have been explicitly setting this up, we didn't notice when the default stopped working. At the moment, any default transfers without an explicit bit size setting error out with: bfin-spi bfin-spi.0: transfer: unsupported bits_per_word So in the transfer logic, have a bits_per_word setting of 0 fall into the 8 bit transfer logic. Signed-off-by: Mike Frysinger Signed-off-by: Grant Likely --- drivers/spi/spi_bfin5xx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c index f706dba165cf..cc880c95e7de 100644 --- a/drivers/spi/spi_bfin5xx.c +++ b/drivers/spi/spi_bfin5xx.c @@ -681,13 +681,14 @@ static void bfin_spi_pump_transfers(unsigned long data) drv_data->cs_change = transfer->cs_change; /* Bits per word setup */ - bits_per_word = transfer->bits_per_word ? : message->spi->bits_per_word; - if ((bits_per_word > 0) && (bits_per_word % 16 == 0)) { + bits_per_word = transfer->bits_per_word ? : + message->spi->bits_per_word ? : 8; + if (bits_per_word % 16 == 0) { drv_data->n_bytes = bits_per_word/8; drv_data->len = (transfer->len) >> 1; cr_width = BIT_CTL_WORDSIZE; drv_data->ops = &bfin_bfin_spi_transfer_ops_u16; - } else if ((bits_per_word > 0) && (bits_per_word % 8 == 0)) { + } else if (bits_per_word % 8 == 0) { drv_data->n_bytes = bits_per_word/8; drv_data->len = transfer->len; cr_width = 0; From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: [PATCH 231/327] KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/kmod.h | 8 ++++---- kernel/kmod.c | 16 +++++++++------- security/keys/request_key.c | 3 +-- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..6075a1e727ae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info) +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *rp, *wp; struct fdtable *fdt; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index d4a5c84c503d..0da38cf7db7b 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -45,7 +45,7 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #endif -struct key; +struct cred; struct file; enum umh_wait { @@ -62,7 +62,7 @@ struct subprocess_info { char **envp; enum umh_wait wait; int retval; - int (*init)(struct subprocess_info *info); + int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; }; @@ -73,7 +73,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, /* Set various pieces of state into the subprocess_info structure */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data); @@ -87,7 +87,7 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int call_usermodehelper_fns(char *path, char **argv, char **envp, enum umh_wait wait, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data) { struct subprocess_info *info; diff --git a/kernel/kmod.c b/kernel/kmod.c index ad6a81c58b44..47613dfb7b28 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data) */ set_user_nice(current, 0); - if (sub_info->init) { - retval = sub_info->init(sub_info); - if (retval) - goto fail; - } - retval = -ENOMEM; new = prepare_kernel_cred(current); if (!new) @@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data) new->cap_inheritable); spin_unlock(&umh_sysctl_lock); + if (sub_info->init) { + retval = sub_info->init(sub_info, new); + if (retval) { + abort_creds(new); + goto fail; + } + } + commit_creds(new); retval = kernel_execve(sub_info->path, @@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * context in which call_usermodehelper_exec is called. */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data) { diff --git a/security/keys/request_key.c b/security/keys/request_key.c index d31862e0aa1c..8e319a416eec 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -71,9 +71,8 @@ EXPORT_SYMBOL(complete_request_key); * This is called in context of freshly forked kthread before kernel_execve(), * so we can simply install the desired session_keyring at this point. */ -static int umh_keys_init(struct subprocess_info *info) +static int umh_keys_init(struct subprocess_info *info, struct cred *cred) { - struct cred *cred = (struct cred*)current_cred(); struct key *keyring = info->data; return install_session_keyring_to_cred(cred, keyring); From 7585717f304f5ed005cc4ad933a69aab3efbd136 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 13 Jun 2011 20:00:16 -0400 Subject: [PATCH 232/327] Btrfs: fix relocation races The recent commit to get rid of our trans_mutex introduced some races with block group relocation. The problem is that relocation needs to do some record keeping about each root, and it was relying on the transaction mutex to coordinate things in subtle ways. This fix adds a mutex just for the relocation code and makes sure it doesn't have a big impact on normal operations. The race is really fixed in btrfs_record_root_in_trans, which is where we step back and wait for the relocation code to finish accounting setup. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 14 ++++++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/relocation.c | 30 +++++++++++------ fs/btrfs/transaction.c | 73 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 105 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8490ee063709..a2c91a102b72 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -967,6 +967,12 @@ struct btrfs_fs_info { struct srcu_struct subvol_srcu; spinlock_t trans_lock; + /* + * the reloc mutex goes with the trans lock, it is taken + * during commit to protect us from the relocation code + */ + struct mutex reloc_mutex; + struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -1172,6 +1178,14 @@ struct btrfs_root { u32 type; u64 highest_objectid; + + /* btrfs_record_root_in_trans is a multi-step process, + * and it can race with the balancing code. But the + * race is very small, and only the first time the root + * is added to each transaction. So in_trans_setup + * is used to tell us when more checks are required + */ + unsigned long in_trans_setup; int ref_cows; int track_dirty; int in_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20c111b3fa0d..0b2b4b759136 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1620,6 +1620,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); + mutex_init(&fs_info->reloc_mutex); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f25b10a22a0a..086b1e6b8614 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, int ret; if (!root->reloc_root) - return 0; + goto out; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, root->fs_info->tree_root, &reloc_root->root_key, root_item); BUG_ON(ret); + +out: return 0; } @@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - spin_lock(&root->fs_info->trans_lock); + mutex_lock(&root->fs_info->reloc_mutex); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); + again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - spin_lock(&root->fs_info->trans_lock); + + /* + * this serializes us with btrfs_record_root_in_transaction, + * we have to make sure nobody is in the middle of + * adding their roots to the list while we are + * doing this splice + */ + mutex_lock(&root->fs_info->reloc_mutex); list_splice_init(&rc->reloc_roots, &reloc_roots); - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); while (!list_empty(&reloc_roots)) { found = 1; @@ -3590,17 +3600,19 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = rc; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = NULL; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static int check_extent_flags(u64 flags) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b3590b9fe98..833996a0c628 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) * to make sure the old root from before we joined the transaction is deleted * when the transaction commits */ -int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, +static int record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (root->ref_cows && root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); WARN_ON(root->commit_root != root->node); + /* + * see below for in_trans_setup usage rules + * we have the reloc mutex held now, so there + * is only one writer in this function + */ + root->in_trans_setup = 1; + + /* make sure readers find in_trans_setup before + * they find our root->last_trans update + */ + smp_wmb(); + spin_lock(&root->fs_info->fs_roots_radix_lock); if (root->last_trans == trans->transid) { spin_unlock(&root->fs_info->fs_roots_radix_lock); return 0; } - root->last_trans = trans->transid; radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); spin_unlock(&root->fs_info->fs_roots_radix_lock); + root->last_trans = trans->transid; + + /* this is pretty tricky. We don't want to + * take the relocation lock in btrfs_record_root_in_trans + * unless we're really doing the first setup for this root in + * this transaction. + * + * Normally we'd use root->last_trans as a flag to decide + * if we want to take the expensive mutex. + * + * But, we have to set root->last_trans before we + * init the relocation root, otherwise, we trip over warnings + * in ctree.c. The solution used here is to flag ourselves + * with root->in_trans_setup. When this is 1, we're still + * fixing up the reloc trees and everyone must wait. + * + * When this is zero, they can trust root->last_trans and fly + * through btrfs_record_root_in_trans without having to take the + * lock. smp_wmb() makes sure that all the writes above are + * done before we pop in the zero below + */ btrfs_init_reloc_root(trans, root); + smp_wmb(); + root->in_trans_setup = 0; } return 0; } + +int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (!root->ref_cows) + return 0; + + /* + * see record_root_in_trans for comments about in_trans_setup usage + * and barriers + */ + smp_rmb(); + if (root->last_trans == trans->transid && + !root->in_trans_setup) + return 0; + + mutex_lock(&root->fs_info->reloc_mutex); + record_root_in_trans(trans, root); + mutex_unlock(&root->fs_info->reloc_mutex); + + return 0; +} + /* wait for commit against the current transaction to become unblocked * when this is done, it is safe to start a new transaction, but the current * transaction might not be fully on disk. @@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent = dget_parent(dentry); parent_inode = parent->d_inode; parent_root = BTRFS_I(parent_inode)->root; - btrfs_record_root_in_trans(trans, parent_root); + record_root_in_trans(trans, parent_root); /* * insert the directory item @@ -900,7 +957,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); - btrfs_record_root_in_trans(trans, root); + record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); btrfs_check_and_init_root_item(new_root_item); @@ -1247,6 +1304,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); + /* + * the reloc mutex makes sure that we stop + * the balancing code from coming in and moving + * extents around in the middle of the commit + */ + mutex_lock(&root->fs_info->reloc_mutex); + ret = create_pending_snapshots(trans, root->fs_info); BUG_ON(ret); @@ -1312,6 +1376,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->running_transaction = NULL; root->fs_info->trans_no_join = 0; spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); wake_up(&root->fs_info->transaction_wait); From 2ff7d09a1b0f20f2d9c1bde0e003d4e384de2313 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 1 Jun 2011 17:49:14 +0000 Subject: [PATCH 233/327] RDMA/cxgb4: Don't exceed hw IQ depth limit for user CQs Memory allocated for user CQs gets rounded up to the next page boundary. And after rounding, we recalculate the resulting IQ depth and we need to make sure we don't exceed the HW limits. This bug can result a much smaller CQ allocated than was expected if the HW size field is exceeded, resulting in CQ overflow failures. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 8d8f8add6fcd..1720dc790d13 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -801,6 +801,10 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (ucontext) { memsize = roundup(memsize, PAGE_SIZE); hwentries = memsize / sizeof *chp->cq.queue; + while (hwentries > T4_MAX_IQ_SIZE) { + memsize -= PAGE_SIZE; + hwentries = memsize / sizeof *chp->cq.queue; + } } chp->cq.size = hwentries; chp->cq.memsize = memsize; From 3ed4498caf381a73d6259d3ffacc914b17a507ec Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 13 Jun 2011 17:54:22 +0000 Subject: [PATCH 234/327] btrfs: fix dereference of ERR_PTR value smatch reports: btrfs_recover_log_trees error: 'wc.replay_dest' dereferencing possible ERR_PTR() Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 592396c6dc47..4ce8a9f41d1e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3177,7 +3177,7 @@ again: tmp_key.offset = (u64)-1; wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); - BUG_ON(!wc.replay_dest); + BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); wc.replay_dest->log_root = log; btrfs_record_root_in_trans(trans, wc.replay_dest); From 9fe6a50fb764f508dd2de47a66e62e51388791fb Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 16 Jun 2011 09:04:57 +0000 Subject: [PATCH 235/327] btrfs: Remove unused sysfs code Removes code no longer used. The sysfs file itself is kept, because the btrfs developers expressed interest in putting new entries to sysfs. Signed-off-by: Maarten Lankhorst Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/sysfs.c | 146 --------------------------------------------- 3 files changed, 148 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a2c91a102b72..8e948ec1ee6b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1195,7 +1195,6 @@ struct btrfs_root { struct btrfs_key defrag_max; int defrag_running; char *name; - int in_sysfs; /* the dirty list is only used by non-reference counted roots */ struct list_head dirty_list; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0b2b4b759136..c25ef5a0ccd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_trans = 0; root->highest_objectid = 0; root->name = NULL; - root->in_sysfs = 0; root->inode_tree = RB_ROOT; INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); root->block_rsv = NULL; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c3c223ae6691..daac9ae6d731 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -28,152 +28,6 @@ #include "disk-io.h" #include "transaction.h" -static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_used(&root->root_item)); -} - -static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_limit(&root->root_item)); -} - -static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) -{ - - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); -} - -static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); -} - -static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); -} - -/* this is for root attrs (subvols/snapshots) */ -struct btrfs_root_attr { - struct attribute attr; - ssize_t (*show)(struct btrfs_root *, char *); - ssize_t (*store)(struct btrfs_root *, const char *, size_t); -}; - -#define ROOT_ATTR(name, mode, show, store) \ -static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ - show, store) - -ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); -ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); - -static struct attribute *btrfs_root_attrs[] = { - &btrfs_root_attr_blocks_used.attr, - &btrfs_root_attr_block_limit.attr, - NULL, -}; - -/* this is for super attrs (actual full fs) */ -struct btrfs_super_attr { - struct attribute attr; - ssize_t (*show)(struct btrfs_fs_info *, char *); - ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); -}; - -#define SUPER_ATTR(name, mode, show, store) \ -static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ - show, store) - -SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); -SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); -SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); - -static struct attribute *btrfs_super_attrs[] = { - &btrfs_super_attr_blocks_used.attr, - &btrfs_super_attr_total_blocks.attr, - &btrfs_super_attr_blocksize.attr, - NULL, -}; - -static ssize_t btrfs_super_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, - super_kobj); - struct btrfs_super_attr *a = container_of(attr, - struct btrfs_super_attr, - attr); - - return a->show ? a->show(fs, buf) : 0; -} - -static ssize_t btrfs_super_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t len) -{ - struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, - super_kobj); - struct btrfs_super_attr *a = container_of(attr, - struct btrfs_super_attr, - attr); - - return a->store ? a->store(fs, buf, len) : 0; -} - -static ssize_t btrfs_root_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct btrfs_root *root = container_of(kobj, struct btrfs_root, - root_kobj); - struct btrfs_root_attr *a = container_of(attr, - struct btrfs_root_attr, - attr); - - return a->show ? a->show(root, buf) : 0; -} - -static ssize_t btrfs_root_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t len) -{ - struct btrfs_root *root = container_of(kobj, struct btrfs_root, - root_kobj); - struct btrfs_root_attr *a = container_of(attr, - struct btrfs_root_attr, - attr); - return a->store ? a->store(root, buf, len) : 0; -} - -static void btrfs_super_release(struct kobject *kobj) -{ - struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, - super_kobj); - complete(&fs->kobj_unregister); -} - -static void btrfs_root_release(struct kobject *kobj) -{ - struct btrfs_root *root = container_of(kobj, struct btrfs_root, - root_kobj); - complete(&root->kobj_unregister); -} - -static const struct sysfs_ops btrfs_super_attr_ops = { - .show = btrfs_super_attr_show, - .store = btrfs_super_attr_store, -}; - -static const struct sysfs_ops btrfs_root_attr_ops = { - .show = btrfs_root_attr_show, - .store = btrfs_root_attr_store, -}; - /* /sys/fs/btrfs/ entry */ static struct kset *btrfs_kset; From 19fd294957e426bfdd8e19085096467ec18df5c4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 Jun 2011 10:47:30 +0000 Subject: [PATCH 236/327] btrfs: fix wrong reservation when doing delayed inode operations We have migrated the space for the delayed inode items from trans_block_rsv to global_block_rsv, but we forgot to set trans->block_rsv to global_block_rsv when we doing delayed inode operations, and the following Oops happened: [ 9792.654889] ------------[ cut here ]------------ [ 9792.654898] WARNING: at fs/btrfs/extent-tree.c:5681 btrfs_alloc_free_block+0xca/0x27c [btrfs]() [ 9792.654899] Hardware name: To Be Filled By O.E.M. [ 9792.654900] Modules linked in: btrfs zlib_deflate libcrc32c ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables arc4 rt61pci rt2x00pci rt2x00lib snd_hda_codec_hdmi mac80211 snd_hda_codec_realtek cfg80211 snd_hda_intel edac_core snd_seq rfkill pcspkr serio_raw snd_hda_codec eeprom_93cx6 edac_mce_amd sp5100_tco i2c_piix4 k10temp snd_hwdep snd_seq_device snd_pcm floppy r8169 xhci_hcd mii snd_timer snd soundcore snd_page_alloc ipv6 firewire_ohci pata_acpi ata_generic firewire_core pata_via crc_itu_t radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core [last unloaded: scsi_wait_scan] [ 9792.654919] Pid: 2762, comm: rm Tainted: G W 2.6.39+ #1 [ 9792.654920] Call Trace: [ 9792.654922] [] warn_slowpath_common+0x83/0x9b [ 9792.654925] [] warn_slowpath_null+0x1a/0x1c [ 9792.654933] [] btrfs_alloc_free_block+0xca/0x27c [btrfs] [ 9792.654945] [] ? map_extent_buffer+0x6e/0xa8 [btrfs] [ 9792.654953] [] __btrfs_cow_block+0xfc/0x30c [btrfs] [ 9792.654963] [] ? btrfs_buffer_uptodate+0x47/0x58 [btrfs] [ 9792.654970] [] ? read_block_for_search+0x94/0x368 [btrfs] [ 9792.654978] [] btrfs_cow_block+0xfe/0x146 [btrfs] [ 9792.654986] [] btrfs_search_slot+0x14d/0x4b6 [btrfs] [ 9792.654997] [] ? map_extent_buffer+0x6e/0xa8 [btrfs] [ 9792.655022] [] btrfs_lookup_inode+0x2f/0x8f [btrfs] [ 9792.655025] [] ? _cond_resched+0xe/0x22 [ 9792.655027] [] ? mutex_lock+0x29/0x50 [ 9792.655039] [] btrfs_update_delayed_inode+0x72/0x137 [btrfs] [ 9792.655051] [] btrfs_run_delayed_items+0x90/0xdb [btrfs] [ 9792.655062] [] btrfs_commit_transaction+0x228/0x654 [btrfs] [ 9792.655064] [] ? remove_wait_queue+0x3a/0x3a [ 9792.655075] [] btrfs_evict_inode+0x14d/0x202 [btrfs] [ 9792.655077] [] evict+0x71/0x111 [ 9792.655079] [] iput+0x12a/0x132 [ 9792.655081] [] do_unlinkat+0x106/0x155 [ 9792.655083] [] ? path_put+0x1f/0x23 [ 9792.655085] [] ? audit_syscall_entry+0x145/0x171 [ 9792.655087] [] ? putname+0x34/0x36 [ 9792.655090] [] sys_unlinkat+0x29/0x2b [ 9792.655092] [] system_call_fastpath+0x16/0x1b [ 9792.655093] ---[ end trace 02b696eb02b3f768 ]--- This patch fix it by setting the reservation of the transaction handle to the correct one. Reported-by: Josef Bacik Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 25 ++++++++++++++++++++----- fs/btrfs/delayed-inode.h | 1 - 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6462c29d2d37..fc515b787e8c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) item->data_len = data_len; item->ins_or_del = 0; item->bytes_reserved = 0; - item->block_rsv = NULL; item->delayed_node = NULL; atomic_set(&item->refs, 1); } @@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, num_bytes = btrfs_calc_trans_metadata_size(root, 1); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!ret) { + if (!ret) item->bytes_reserved = num_bytes; - item->block_rsv = dst_rsv; - } return ret; } @@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, struct btrfs_delayed_item *item) { + struct btrfs_block_rsv *rsv; + if (!item->bytes_reserved) return; - btrfs_block_rsv_release(root, item->block_rsv, + rsv = &root->fs_info->global_block_rsv; + btrfs_block_rsv_release(root, rsv, item->bytes_reserved); } @@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_node *curr_node, *prev_node; struct btrfs_path *path; + struct btrfs_block_rsv *block_rsv; int ret = 0; path = btrfs_alloc_path(); @@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, return -ENOMEM; path->leave_spinning = 1; + block_rsv = trans->block_rsv; + trans->block_rsv = &root->fs_info->global_block_rsv; + delayed_root = btrfs_get_delayed_root(root); curr_node = btrfs_first_delayed_node(delayed_root); @@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, } btrfs_free_path(path); + trans->block_rsv = block_rsv; return ret; } @@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *node) { struct btrfs_path *path; + struct btrfs_block_rsv *block_rsv; int ret; path = btrfs_alloc_path(); @@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, return -ENOMEM; path->leave_spinning = 1; + block_rsv = trans->block_rsv; + trans->block_rsv = &node->root->fs_info->global_block_rsv; + ret = btrfs_insert_delayed_items(trans, path, node->root, node); if (!ret) ret = btrfs_delete_delayed_items(trans, path, node->root, node); @@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, ret = btrfs_update_delayed_inode(trans, node->root, path, node); btrfs_free_path(path); + trans->block_rsv = block_rsv; return ret; } @@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) struct btrfs_path *path; struct btrfs_delayed_node *delayed_node = NULL; struct btrfs_root *root; + struct btrfs_block_rsv *block_rsv; unsigned long nr = 0; int need_requeue = 0; int ret; @@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) if (IS_ERR(trans)) goto free_path; + block_rsv = trans->block_rsv; + trans->block_rsv = &root->fs_info->global_block_rsv; + ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); if (!ret) ret = btrfs_delete_delayed_items(trans, path, root, @@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) nr = trans->blocks_used; + trans->block_rsv = block_rsv; btrfs_end_transaction_dmeta(trans, root); __btrfs_btree_balance_dirty(root, nr); free_path: diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index eb7d240aa648..cb79b6771e82 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -75,7 +75,6 @@ struct btrfs_delayed_item { struct list_head tree_list; /* used for batch insert/delete items */ struct list_head readdir_list; /* used for readdir items */ u64 bytes_reserved; - struct btrfs_block_rsv *block_rsv; struct btrfs_delayed_node *delayed_node; atomic_t refs; int ins_or_del; From 35a30d7ce54e087d8025a725d4e5a2fdee723a9f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 13 Jun 2011 15:18:23 +0000 Subject: [PATCH 237/327] btrfs: fix uninitialized return value When allocation fails in btrfs_read_fs_root_no_name, ret is not set although it is returned, holding a garbage value. Signed-off-by: David Sterba Reviewed-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c25ef5a0ccd6..1ac8db5dc0a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1299,12 +1299,12 @@ again: return root; root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); - if (!root->free_ino_ctl) - goto fail; root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), GFP_NOFS); - if (!root->free_ino_pinned) + if (!root->free_ino_pinned || !root->free_ino_ctl) { + ret = -ENOMEM; goto fail; + } btrfs_init_free_ino_ctl(root); mutex_init(&root->fs_commit_mutex); From 301c2c3f039a1f9478f6cbef60f2ccd4da9bd4a1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 14 Jun 2011 20:59:21 +0000 Subject: [PATCH 238/327] RDMA/cxgb4: Don't truncate MR lengths Remove left-over code from T3 that limited MR sizes to 32b. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 273ffe49525a..0347eed4a167 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -625,7 +625,7 @@ pbl_done: mhp->attr.perms = c4iw_ib_to_tpt_access(acc); mhp->attr.va_fbo = virt; mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; + mhp->attr.len = length; err = register_mem(rhp, php, mhp, shift); if (err) From 8da7e7a55231543b84ac84e93ad5ca9d340773d7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 14 Jun 2011 20:59:27 +0000 Subject: [PATCH 239/327] RDMA/cxgb4: Couple of abort fixes - fix a race where the driver could end up sending a close_con_req after an abort_rpl. In c4iw_ep_disconnect(), send abort or close request with the ep mutex held. - fix a hang where driver fails to wake up when a connection is reset during a normal close. Wake up any waiters in the interrupt path, and correctly cleanup after rdma_fini() failures. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 46 +++++++++++++++++++++++++------- drivers/infiniband/hw/cxgb4/qp.c | 5 +--- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index f660cd04ec2f..31fb44085c9b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1463,9 +1463,9 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; int disconnect = 1; int release = 0; - int abort = 0; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(hdr); + int ret; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -1501,10 +1501,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) start_ep_timer(ep); __state_set(&ep->com, CLOSING); attrs.next_state = C4IW_QP_STATE_CLOSING; - abort = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - disconnect = 1; + if (ret != -ECONNRESET) { + peer_close_upcall(ep); + disconnect = 1; + } break; case ABORTING: disconnect = 0; @@ -2109,15 +2111,16 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) break; } - mutex_unlock(&ep->com.mutex); if (close) { - if (abrupt) - ret = abort_connection(ep, NULL, gfp); - else + if (abrupt) { + close_complete_upcall(ep); + ret = send_abort(ep, NULL, gfp); + } else ret = send_halfclose(ep, gfp); if (ret) fatal = 1; } + mutex_unlock(&ep->com.mutex); if (fatal) release_ep_resources(ep); return ret; @@ -2301,6 +2304,31 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, + ep->hwtid); + kfree_skb(skb); + return 0; + } + PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + */ + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + sched(dev, skb); + return 0; +} + /* * Most upcalls from the T4 Core go to sched() to * schedule the processing on a work queue. @@ -2317,7 +2345,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_PASS_ESTABLISH] = sched, [CPL_PEER_CLOSE] = sched, [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, + [CPL_ABORT_REQ_RSS] = peer_abort_intr, [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 3b773b05a898..a41578e48c7b 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1207,11 +1207,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, c4iw_get_ep(&qhp->ep->com); } ret = rdma_fini(rhp, qhp, ep); - if (ret) { - if (internal) - c4iw_get_ep(&qhp->ep->com); + if (ret) goto err; - } break; case C4IW_QP_STATE_TERMINATE: set_state(qhp, C4IW_QP_STATE_TERMINATE); From 3126448451105fae59de0058c68692aa09aa4c37 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 9 Jun 2011 20:27:26 +0000 Subject: [PATCH 240/327] IB/qib: Ensure that LOS and DFE are being turned off Due to timing, it is possible for the LOS and DFE to remain on. This is due to the link progressing to LinkUP prior to the driver getting the first Status Changed interrupt. By expanding the conditions under which LOS is turned off and DFE timeout is being set, timing is no longer an issue. Signed-off-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba7322.c | 25 ++++++++++++++++++------- drivers/infiniband/hw/qib/qib_intr.c | 6 +++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 9f53e68a096a..8ec5237031a0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -469,6 +469,8 @@ static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { #define IB_7322_LT_STATE_RECOVERIDLE 0x0f #define IB_7322_LT_STATE_CFGENH 0x10 #define IB_7322_LT_STATE_CFGTEST 0x11 +#define IB_7322_LT_STATE_CFGWAITRMTTEST 0x12 +#define IB_7322_LT_STATE_CFGWAITENH 0x13 /* link state machine states from IBC */ #define IB_7322_L_STATE_DOWN 0x0 @@ -498,8 +500,10 @@ static const u8 qib_7322_physportstate[0x20] = { IB_PHYSPORTSTATE_LINK_ERR_RECOVER, [IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH, [IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x13] = IB_PHYSPORTSTATE_CFG_WAIT_ENH, + [IB_7322_LT_STATE_CFGWAITRMTTEST] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGWAITENH] = + IB_PHYSPORTSTATE_CFG_WAIT_ENH, [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, @@ -1692,7 +1696,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) break; } - if (ibclt == IB_7322_LT_STATE_CFGTEST && + if (((ibclt >= IB_7322_LT_STATE_CFGTEST && + ibclt <= IB_7322_LT_STATE_CFGWAITENH) || + ibclt == IB_7322_LT_STATE_LINKUP) && (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) { force_h1(ppd); ppd->cpspec->qdr_reforce = 1; @@ -7301,12 +7307,17 @@ static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data, static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) { u64 data = qib_read_kreg_port(ppd, krp_serdesctrl); - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS %s\n", - ppd->dd->unit, ppd->port, (enable ? "on" : "off")); - if (enable) + u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); + + if (enable && !state) { + printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n", + ppd->dd->unit, ppd->port); data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); - else + } else if (!enable && state) { + printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n", + ppd->dd->unit, ppd->port); data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } qib_write_kreg_port(ppd, krp_serdesctrl, data); } diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index a693c56ec8a6..6ae57d23004a 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -96,8 +96,12 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) * states, or if it transitions from any of the up (INIT or better) * states into any of the down states (except link recovery), then * call the chip-specific code to take appropriate actions. + * + * ppd->lflags could be 0 if this is the first time the interrupt + * handlers has been called but the link is already up. */ - if (lstate >= IB_PORT_INIT && (ppd->lflags & QIBL_LINKDOWN) && + if (lstate >= IB_PORT_INIT && + (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) && ltstate == IB_PHYSPORTSTATE_LINKUP) { /* transitioned to UP */ if (dd->f_ib_updown(ppd, 1, ibcs)) From cab758ef30e0e40f783627abc4b66d1b48fecd49 Mon Sep 17 00:00:00 2001 From: Clive Stubbings Date: Thu, 16 Jun 2011 22:30:39 +0000 Subject: [PATCH 241/327] fs_enet: fix freescale FCC ethernet dp buffer alignment The RIPTR and TIPTR (receive/transmit internal temporary data pointer), used by microcode as a temporary buffer for data, must be 32-byte aligned according to the RM for MPC8247. Tested on mgcoge. Signed-off-by: Clive Stubbings Signed-off-by: Holger Brunck cc: Pantelis Antoniou cc: Vitaly Bordug cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/fs_enet/mac-fcc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fs_enet/mac-fcc.c b/drivers/net/fs_enet/mac-fcc.c index 7a84e45487e8..7583a9572bcc 100644 --- a/drivers/net/fs_enet/mac-fcc.c +++ b/drivers/net/fs_enet/mac-fcc.c @@ -105,7 +105,7 @@ static int do_pd_setup(struct fs_enet_private *fep) goto out_ep; fep->fcc.mem = (void __iomem *)cpm2_immr; - fpi->dpram_offset = cpm_dpalloc(128, 8); + fpi->dpram_offset = cpm_dpalloc(128, 32); if (IS_ERR_VALUE(fpi->dpram_offset)) { ret = fpi->dpram_offset; goto out_fcccp; From 1eddceadb0d6441cd39b2c38705a8f5fec86e770 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 03:45:15 +0000 Subject: [PATCH 242/327] net: rfs: enable RFS before first data packet is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le jeudi 16 juin 2011 à 23:38 -0400, David Miller a écrit : > From: Ben Hutchings > Date: Fri, 17 Jun 2011 00:50:46 +0100 > > > On Wed, 2011-06-15 at 04:15 +0200, Eric Dumazet wrote: > >> @@ -1594,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) > >> goto discard; > >> > >> if (nsk != sk) { > >> + sock_rps_save_rxhash(nsk, skb->rxhash); > >> if (tcp_child_process(sk, nsk, skb)) { > >> rsk = nsk; > >> goto reset; > >> > > > > I haven't tried this, but it looks reasonable to me. > > > > What about IPv6? The logic in tcp_v6_do_rcv() looks very similar. > > Indeed ipv6 side needs the same fix. > > Eric please add that part and resubmit. And in fact I might stick > this into net-2.6 instead of net-next-2.6 > OK, here is the net-2.6 based one then, thanks ! [PATCH v2] net: rfs: enable RFS before first data packet is received First packet received on a passive tcp flow is not correctly RFS steered. One sock_rps_record_flow() call is missing in inet_accept() But before that, we also must record rxhash when child socket is setup. Signed-off-by: Eric Dumazet CC: Tom Herbert CC: Ben Hutchings CC: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c1926027a26..eae1f676f870 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -676,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..708dc203b034 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd28711ba5..87551ca568cd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1644,6 +1644,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) From d0fd64c1de437bdf91c32d4f84a53fa4b2150348 Mon Sep 17 00:00:00 2001 From: Pavel Shved Date: Fri, 17 Jun 2011 06:25:10 +0000 Subject: [PATCH 243/327] farsync: add module_put to error path in fst_open() The fst_open() function, after a successful try_module_get() may return an error code if hdlc_open() returns it. However, it does not put the module on this error path. This patch adds the necessary module_put() call. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Pavel Shved Signed-off-by: David S. Miller --- drivers/net/wan/farsync.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index e050bd65e037..777d1a4e81b2 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2203,8 +2203,10 @@ fst_open(struct net_device *dev) if (port->mode != FST_RAW) { err = hdlc_open(dev); - if (err) + if (err) { + module_put(THIS_MODULE); return err; + } } fst_openport(port); From 2f9381e98471837b631743270de988e78aad1f96 Mon Sep 17 00:00:00 2001 From: Pavel Shved Date: Fri, 17 Jun 2011 06:25:11 +0000 Subject: [PATCH 244/327] gigaset: call module_put before restart of if_open() if_open() calls try_module_get(), and after an attempt to lock a mutex the if_open() function may return -ERESTARTSYS without putting the module. Then, when if_open() is executed again, try_module_get() is called making the reference counter of THIS_MODULE greater than one at successful exit from if_open(). The if_close() function puts the module only once, and as a result it can't be unloaded. This patch adds module_put call before the return from if_open(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Pavel Shved Signed-off-by: David S. Miller --- drivers/isdn/gigaset/interface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c index 59de638225fe..e35058bcd7b9 100644 --- a/drivers/isdn/gigaset/interface.c +++ b/drivers/isdn/gigaset/interface.c @@ -156,8 +156,10 @@ static int if_open(struct tty_struct *tty, struct file *filp) if (!cs || !try_module_get(cs->driver->owner)) return -ENODEV; - if (mutex_lock_interruptible(&cs->mutex)) + if (mutex_lock_interruptible(&cs->mutex)) { + module_put(cs->driver->owner); return -ERESTARTSYS; + } tty->driver_data = cs; ++cs->open_count; From eeb1497277d6b1a0a34ed36b97e18f2bd7d6de0d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 16:25:39 -0400 Subject: [PATCH 245/327] inet_diag: fix inet_diag_bc_audit() A malicious user or buggy application can inject code and trigger an infinite loop in inet_diag_bc_audit() Also make sure each instruction is aligned on 4 bytes boundary, to avoid unaligned accesses. Reported-by: Dan Rosenberg Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6ffe94ca5bc9..3267d3898437 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } From e999376f094162aa425ae749aa1df95ab928d010 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 17 Jun 2011 16:14:09 -0400 Subject: [PATCH 246/327] Btrfs: avoid delayed metadata items during commits Snapshot creation has two phases. One is the initial snapshot setup, and the second is done during commit, while nobody is allowed to modify the root we are snapshotting. The delayed metadata insertion code can break that rule, it does a delayed inode update on the inode of the parent of the snapshot, and delayed directory item insertion. This makes sure to run the pending delayed operations before we record the snapshot root, which avoids corruptions. Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 7 +++++++ fs/btrfs/delayed-inode.h | 4 ++++ fs/btrfs/transaction.c | 27 +++++++++++++++++---------- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index fc515b787e8c..f1cbd028f7b3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1237,6 +1237,13 @@ again: return 0; } +void btrfs_assert_delayed_root_empty(struct btrfs_root *root) +{ + struct btrfs_delayed_root *delayed_root; + delayed_root = btrfs_get_delayed_root(root); + WARN_ON(btrfs_first_delayed_node(delayed_root)); +} + void btrfs_balance_delayed_items(struct btrfs_root *root) { struct btrfs_delayed_root *delayed_root; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index cb79b6771e82..d1a6a2915c66 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -137,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, /* for init */ int __init btrfs_delayed_inode_init(void); void btrfs_delayed_inode_exit(void); + +/* for debugging */ +void btrfs_assert_delayed_root_empty(struct btrfs_root *root); + #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c073d85e14f3..51dcec86757f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -957,6 +957,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); + /* + * pull in the delayed directory update + * and the delayed inode item + * otherwise we corrupt the FS during + * snapshot + */ + ret = btrfs_run_delayed_items(trans, root); + BUG_ON(ret); + record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); @@ -1018,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, int ret; list_for_each_entry(pending, head, list) { - /* - * We must deal with the delayed items before creating - * snapshots, or we will create a snapthot with inconsistent - * information. - */ - ret = btrfs_run_delayed_items(trans, fs_info->fs_root); - BUG_ON(ret); - ret = create_pending_snapshot(trans, fs_info, pending); BUG_ON(ret); } @@ -1319,15 +1320,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ mutex_lock(&root->fs_info->reloc_mutex); - ret = create_pending_snapshots(trans, root->fs_info); + ret = btrfs_run_delayed_items(trans, root); BUG_ON(ret); - ret = btrfs_run_delayed_items(trans, root); + ret = create_pending_snapshots(trans, root->fs_info); BUG_ON(ret); ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); BUG_ON(ret); + /* + * make sure none of the code above managed to slip in a + * delayed item + */ + btrfs_assert_delayed_root_empty(root); + WARN_ON(cur_trans != trans->transaction); btrfs_scrub_pause(root); From 3744100e05c4e403ed21c99cd389c7e784664e4b Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 17 Jun 2011 22:58:54 +0200 Subject: [PATCH 247/327] r8169: fix static initializers. Signed-off-by: Francois Romieu --- drivers/net/r8169.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index ef1ce2ebeb4a..05d81780d1fd 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1621,7 +1621,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, * * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec */ - static const struct { + static const struct rtl_mac_info { u32 mask; u32 val; int mac_version; @@ -1689,7 +1689,8 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, /* Catch-all */ { 0x00000000, 0x00000000, RTL_GIGA_MAC_NONE } - }, *p = mac_info; + }; + const struct rtl_mac_info *p = mac_info; u32 reg; reg = RTL_R32(TxConfig); @@ -3681,7 +3682,7 @@ static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz) static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version) { - static const struct { + static const struct rtl_cfg2_info { u32 mac_version; u32 clk; u32 val; @@ -3690,7 +3691,8 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version) { RTL_GIGA_MAC_VER_05, PCI_Clock_66MHz, 0x000fffff }, { RTL_GIGA_MAC_VER_06, PCI_Clock_33MHz, 0x00ffff00 }, // 8110SCe { RTL_GIGA_MAC_VER_06, PCI_Clock_66MHz, 0x00ffffff } - }, *p = cfg2_info; + }; + const struct rtl_cfg2_info *p = cfg2_info; unsigned int i; u32 clk; From 650f156775c2638cc02ed7df31186a09ba79666a Mon Sep 17 00:00:00 2001 From: Jeff Ohlstein Date: Fri, 17 Jun 2011 13:55:38 -0700 Subject: [PATCH 248/327] msm: timer: compensate for timer shift in msm_read_timer_count Some msm targets have timers whose lower bits are unreliable. So, we present our timers as lower frequency than they actually are, and ignore the bottom 5 bits on such targets. This compensation was erroneously removed from the msm_read_timer_count function, so restore it. This was broken by 94790ec25 "msm: timer: SMP timer support for msm". Signed-off-by: Jeff Ohlstein --- arch/arm/mach-msm/timer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 9bfdd5ad2441..2232032181be 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -102,7 +102,11 @@ static cycle_t msm_read_timer_count(struct clocksource *cs) { struct msm_clock *clk = container_of(cs, struct msm_clock, clocksource); - return readl(clk->global_counter); + /* + * Shift timer count down by a constant due to unreliable lower bits + * on some targets. + */ + return readl(clk->global_counter) >> clk->shift; } static struct msm_clock *clockevent_to_clock(struct clock_event_device *evt) From 498e720b96379d8ee9c294950a01534a73defcf3 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 17 Jun 2011 11:32:19 -0700 Subject: [PATCH 249/327] drm/i915: Fix gen6 (SNB) missed BLT ring interrupts. The failure appeared in dmesg as: [drm:i915_hangcheck_ring_idle] *ERROR* Hangcheck timer elapsed... blt ring idle [waiting on 35064155, at 35064155], missed IRQ? This works around that problem on by making the blitter command streamer write interrupt state to the Hardware Status Page when a MI_USER_INTERRUPT command is decoded, which appears to force the seqno out to memory before the interrupt happens. v1->v2: Moved to prior interrupt handler installation and RMW flags as per feedback. v2->v3: Removed RMW of flags (by anholt) Cc: stable@kernel.org Signed-off-by: Daniel J Blueman Signed-off-by: Eric Anholt Tested-by: Chris Wilson [v1] Tested-by: Eric Anholt [v1,v3] (incidence of the bug with a testcase went from avg 2/1000 to 0/12651 in the latest test run (plus more for v1)) Tested-by: Kenneth Graunke [v1] Tested-by: Robert Hooker [v1] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=33394 Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b9fafe3b045b..9e34a1abeb61 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1740,6 +1740,16 @@ void ironlake_irq_preinstall(struct drm_device *dev) INIT_WORK(&dev_priv->rps_work, gen6_pm_rps_work); I915_WRITE(HWSTAM, 0xeffe); + if (IS_GEN6(dev)) { + /* Workaround stalls observed on Sandy Bridge GPUs by + * making the blitter command streamer generate a + * write to the Hardware Status Page for + * MI_USER_INTERRUPT. This appears to serialize the + * previous seqno write out before the interrupt + * happens. + */ + I915_WRITE(GEN6_BLITTER_HWSTAM, ~GEN6_BLITTER_USER_INTERRUPT); + } /* XXX hotplug from PCH */ From bb4aa39676f73b4657b3edd893ae83881c430c0c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 16 Jun 2011 20:44:51 -0700 Subject: [PATCH 250/327] mm: avoid repeated anon_vma lock/unlock sequences in anon_vma_clone() In anon_vma_clone() we traverse the vma->anon_vma_chain of the source vma, locking the anon_vma for each entry. But they are all going to have the same root entry, which means that we're locking and unlocking the same lock over and over again. Which is expensive in locked operations, but can get _really_ expensive when that root entry sees any kind of lock contention. In fact, Tim Chen reports a big performance regression due to this: when we switched to use a mutex instead of a spinlock, the contention case gets much worse. So to alleviate this all, this commit creates a small helper function (lock_anon_vma_root()) that can be used to take the lock just once rather than taking and releasing it over and over again. We still have the same "take the lock and release" it behavior in the exit path (in unlink_anon_vmas()), but that one is a bit harder to fix since we're actually freeing the anon_vma entries as we go, and that will touch the lock too. Reported-and-tested-by: Tim Chen Tested-by: Hugh Dickins Cc: Peter Zijlstra Cc: Andi Kleen Signed-off-by: Linus Torvalds --- mm/rmap.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 0eb463ea88dd..f286697c61dc 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -200,6 +200,32 @@ int anon_vma_prepare(struct vm_area_struct *vma) return -ENOMEM; } +/* + * This is a useful helper function for locking the anon_vma root as + * we traverse the vma->anon_vma_chain, looping over anon_vma's that + * have the same vma. + * + * Such anon_vma's should have the same root, so you'd expect to see + * just a single mutex_lock for the whole traversal. + */ +static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma) +{ + struct anon_vma *new_root = anon_vma->root; + if (new_root != root) { + if (WARN_ON_ONCE(root)) + mutex_unlock(&root->mutex); + root = new_root; + mutex_lock(&root->mutex); + } + return root; +} + +static inline void unlock_anon_vma_root(struct anon_vma *root) +{ + if (root) + mutex_unlock(&root->mutex); +} + static void anon_vma_chain_link(struct vm_area_struct *vma, struct anon_vma_chain *avc, struct anon_vma *anon_vma) @@ -208,13 +234,11 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, avc->anon_vma = anon_vma; list_add(&avc->same_vma, &vma->anon_vma_chain); - anon_vma_lock(anon_vma); /* * It's critical to add new vmas to the tail of the anon_vma, * see comment in huge_memory.c:__split_huge_page(). */ list_add_tail(&avc->same_anon_vma, &anon_vma->head); - anon_vma_unlock(anon_vma); } /* @@ -224,16 +248,23 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { struct anon_vma_chain *avc, *pavc; + struct anon_vma *root = NULL; list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { + struct anon_vma *anon_vma; + avc = anon_vma_chain_alloc(); if (!avc) goto enomem_failure; - anon_vma_chain_link(dst, avc, pavc->anon_vma); + anon_vma = pavc->anon_vma; + root = lock_anon_vma_root(root, anon_vma); + anon_vma_chain_link(dst, avc, anon_vma); } + unlock_anon_vma_root(root); return 0; enomem_failure: + unlock_anon_vma_root(root); unlink_anon_vmas(dst); return -ENOMEM; } @@ -280,7 +311,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) get_anon_vma(anon_vma->root); /* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; + anon_vma_lock(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); + anon_vma_unlock(anon_vma); return 0; From eee2acbae95555006307395d8a6c91452d62851d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2011 13:54:23 +0200 Subject: [PATCH 251/327] mm: avoid repeated anon_vma lock/unlock sequences in unlink_anon_vmas() This matches the anon_vma_clone() case, and uses the same lock helper functions. Because of the need to potentially release the anon_vma's, it's a bit more complex, though. We traverse the 'vma->anon_vma_chain' in two phases: the first loop gets the anon_vma lock (with the helper function that only takes the lock once for the whole loop), and removes any entries that don't need any more processing. The second phase just traverses the remaining list entries (without holding the anon_vma lock), and does any actual freeing of the anon_vma's that is required. Signed-off-by: Peter Zijlstra Tested-by: Hugh Dickins Tested-by: Tim Chen Cc: Andi Kleen Signed-off-by: Linus Torvalds --- mm/rmap.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index f286697c61dc..68756a77ef87 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -324,36 +324,43 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) return -ENOMEM; } -static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) -{ - struct anon_vma *anon_vma = anon_vma_chain->anon_vma; - int empty; - - /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */ - if (!anon_vma) - return; - - anon_vma_lock(anon_vma); - list_del(&anon_vma_chain->same_anon_vma); - - /* We must garbage collect the anon_vma if it's empty */ - empty = list_empty(&anon_vma->head); - anon_vma_unlock(anon_vma); - - if (empty) - put_anon_vma(anon_vma); -} - void unlink_anon_vmas(struct vm_area_struct *vma) { struct anon_vma_chain *avc, *next; + struct anon_vma *root = NULL; /* * Unlink each anon_vma chained to the VMA. This list is ordered * from newest to oldest, ensuring the root anon_vma gets freed last. */ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { - anon_vma_unlink(avc); + struct anon_vma *anon_vma = avc->anon_vma; + + root = lock_anon_vma_root(root, anon_vma); + list_del(&avc->same_anon_vma); + + /* + * Leave empty anon_vmas on the list - we'll need + * to free them outside the lock. + */ + if (list_empty(&anon_vma->head)) + continue; + + list_del(&avc->same_vma); + anon_vma_chain_free(avc); + } + unlock_anon_vma_root(root); + + /* + * Iterate the list once more, it now only contains empty and unlinked + * anon_vmas, destroy them. Could not do before due to __put_anon_vma() + * needing to acquire the anon_vma->root->mutex. + */ + list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { + struct anon_vma *anon_vma = avc->anon_vma; + + put_anon_vma(anon_vma); + list_del(&avc->same_vma); anon_vma_chain_free(avc); } From dd34739c03f2f9a79403d33419c2e61e11b4c403 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 17 Jun 2011 19:05:36 -0700 Subject: [PATCH 252/327] mm: avoid anon_vma_chain allocation under anon_vma lock Hugh Dickins points out that lockdep (correctly) spots a potential deadlock on the anon_vma lock, because we now do a GFP_KERNEL allocation of anon_vma_chain while doing anon_vma_clone(). The problem is that page reclaim will want to take the anon_vma lock of any anonymous pages that it will try to reclaim. So re-organize the code in anon_vma_clone() slightly: first do just a GFP_NOWAIT allocation, which will usually work fine. But if that fails, let's just drop the lock and re-do the allocation, now with GFP_KERNEL. End result: not only do we avoid the locking problem, this also ends up getting better concurrency in case the allocation does need to block. Tim Chen reports that with all these anon_vma locking tweaks, we're now almost back up to the spinlock performance. Reported-and-tested-by: Hugh Dickins Tested-by: Tim Chen Cc: Peter Zijlstra Cc: Andi Kleen Signed-off-by: Linus Torvalds --- mm/rmap.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 68756a77ef87..27dfd3b82b0f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -112,9 +112,9 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) kmem_cache_free(anon_vma_cachep, anon_vma); } -static inline struct anon_vma_chain *anon_vma_chain_alloc(void) +static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp) { - return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL); + return kmem_cache_alloc(anon_vma_chain_cachep, gfp); } static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) @@ -159,7 +159,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) struct mm_struct *mm = vma->vm_mm; struct anon_vma *allocated; - avc = anon_vma_chain_alloc(); + avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_enomem; @@ -253,9 +253,14 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { struct anon_vma *anon_vma; - avc = anon_vma_chain_alloc(); - if (!avc) - goto enomem_failure; + avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN); + if (unlikely(!avc)) { + unlock_anon_vma_root(root); + root = NULL; + avc = anon_vma_chain_alloc(GFP_KERNEL); + if (!avc) + goto enomem_failure; + } anon_vma = pavc->anon_vma; root = lock_anon_vma_root(root, anon_vma); anon_vma_chain_link(dst, avc, anon_vma); @@ -264,7 +269,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) return 0; enomem_failure: - unlock_anon_vma_root(root); unlink_anon_vmas(dst); return -ENOMEM; } @@ -294,7 +298,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; - avc = anon_vma_chain_alloc(); + avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_error_free_anon_vma; From 0897554cdd9de8a9f6f93d9ba27c7ebfae286158 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 14 Jun 2011 10:16:17 +1000 Subject: [PATCH 253/327] drm/nouveau: fix big-endian switch Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 80218887e0a0..144f79a350ae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -881,8 +881,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags) #ifdef __BIG_ENDIAN /* Put the card in BE mode if it's not */ - if (nv_rd32(dev, NV03_PMC_BOOT_1)) - nv_wr32(dev, NV03_PMC_BOOT_1, 0x00000001); + if (nv_rd32(dev, NV03_PMC_BOOT_1) != 0x01000001) + nv_wr32(dev, NV03_PMC_BOOT_1, 0x01000001); DRM_MEMORYBARRIER(); #endif From 2905544073f6ec235b44f624c66f52b61221a16c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 11 Jun 2011 13:30:32 +0100 Subject: [PATCH 254/327] drm/nouveau/pm: Prevent overflow in nouveau_perf_init() While parsing the perf table, there is no check if the num of entries read from the vbios is less than the currently allocated number. In case of a buggy vbios this will cause overwriting of kernel memory, causing aditional problems. Add a simple check in order to prevent the case Signed-off-by: Emil Velikov Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_perf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c index 922fb6b664ed..ef9dec0e6f8b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_perf.c +++ b/drivers/gpu/drm/nouveau/nouveau_perf.c @@ -182,6 +182,11 @@ nouveau_perf_init(struct drm_device *dev) entries = perf[2]; } + if (entries > NOUVEAU_PM_MAX_LEVEL) { + NV_DEBUG(dev, "perf table has too many entries - buggy vbios?\n"); + entries = NOUVEAU_PM_MAX_LEVEL; + } + entry = perf + headerlen; for (i = 0; i < entries; i++) { struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl]; From f66b3d5540994cb92182312be24944864cec5a16 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 16 Jun 2011 14:40:27 +1000 Subject: [PATCH 255/327] drm/nv50/disp: fix gamma with page flipping overlay turned on Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 74a3f6872701..08da478ba544 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -409,7 +409,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_channel *evo = dispc->sync; int ret; - ret = RING_SPACE(evo, 24); + ret = RING_SPACE(evo, chan ? 25 : 27); if (unlikely(ret)) return ret; @@ -458,8 +458,19 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* queue the flip on the crtc's "display sync" channel */ BEGIN_RING(evo, 0, 0x0100, 1); OUT_RING (evo, 0xfffe0000); - BEGIN_RING(evo, 0, 0x0084, 5); - OUT_RING (evo, chan ? 0x00000100 : 0x00000010); + if (chan) { + BEGIN_RING(evo, 0, 0x0084, 1); + OUT_RING (evo, 0x00000100); + } else { + BEGIN_RING(evo, 0, 0x0084, 1); + OUT_RING (evo, 0x00000010); + /* allows gamma somehow, PDISP will bitch at you if + * you don't wait for vblank before changing this.. + */ + BEGIN_RING(evo, 0, 0x00e0, 1); + OUT_RING (evo, 0x40000000); + } + BEGIN_RING(evo, 0, 0x0088, 4); OUT_RING (evo, dispc->sem.offset); OUT_RING (evo, 0xf00d0000 | dispc->sem.value); OUT_RING (evo, 0x74b1e000); From b16a5a18ff994532120c1d18e678bbc5fb477b62 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 17 Jun 2011 23:41:54 +1000 Subject: [PATCH 256/327] drm/nouveau: fix assumption that semaphore dmaobj is valid in x-chan sync The DDX modifies DMA_SEMAPHORE on nv50 in order to implement sync-to-vblank, things will go very wrong for cross-channel sync after this. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fence.c | 59 +++++++++++-------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 4b9f4493c9f9..7347075ca5b8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,11 +339,12 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) int ret; if (dev_priv->chipset < 0x84) { - ret = RING_SPACE(chan, 3); + ret = RING_SPACE(chan, 4); if (ret) return ret; - BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2); + BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 3); + OUT_RING (chan, NvSema); OUT_RING (chan, sema->mem->start); OUT_RING (chan, 1); } else @@ -351,10 +352,12 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) struct nouveau_vma *vma = &dev_priv->fence.bo->vma; u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 5); + ret = RING_SPACE(chan, 7); if (ret) return ret; + BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); + OUT_RING (chan, chan->vram_handle); BEGIN_RING(chan, NvSubSw, 0x0010, 4); OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset)); @@ -394,11 +397,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) int ret; if (dev_priv->chipset < 0x84) { - ret = RING_SPACE(chan, 4); + ret = RING_SPACE(chan, 5); if (ret) return ret; - BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); + BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 2); + OUT_RING (chan, NvSema); OUT_RING (chan, sema->mem->start); BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1); OUT_RING (chan, 1); @@ -407,10 +411,12 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) struct nouveau_vma *vma = &dev_priv->fence.bo->vma; u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 5); + ret = RING_SPACE(chan, 7); if (ret) return ret; + BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); + OUT_RING (chan, chan->vram_handle); BEGIN_RING(chan, NvSubSw, 0x0010, 4); OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset)); @@ -504,22 +510,22 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) struct nouveau_gpuobj *obj = NULL; int ret; - if (dev_priv->card_type >= NV_C0) - goto out_initialised; + if (dev_priv->card_type < NV_C0) { + /* Create an NV_SW object for various sync purposes */ + ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW); + if (ret) + return ret; - /* Create an NV_SW object for various sync purposes */ - ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW); - if (ret) - return ret; + ret = RING_SPACE(chan, 2); + if (ret) + return ret; - /* we leave subchannel empty for nvc0 */ - ret = RING_SPACE(chan, 2); - if (ret) - return ret; - BEGIN_RING(chan, NvSubSw, 0, 1); - OUT_RING(chan, NvSw); + BEGIN_RING(chan, NvSubSw, 0, 1); + OUT_RING (chan, NvSw); + FIRE_RING (chan); + } - /* Create a DMA object for the shared cross-channel sync area. */ + /* Setup area of memory shared between all channels for x-chan sync */ if (USE_SEMA(dev) && dev_priv->chipset < 0x84) { struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem; @@ -534,23 +540,8 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) nouveau_gpuobj_ref(NULL, &obj); if (ret) return ret; - - ret = RING_SPACE(chan, 2); - if (ret) - return ret; - BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); - OUT_RING(chan, NvSema); - } else { - ret = RING_SPACE(chan, 2); - if (ret) - return ret; - BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); - OUT_RING (chan, chan->vram_handle); /* whole VM */ } - FIRE_RING(chan); - -out_initialised: INIT_LIST_HEAD(&chan->fence.pending); spin_lock_init(&chan->fence.lock); atomic_set(&chan->fence.last_sequence_irq, 0); From 808b4e639eb00394de9989fabca23196c337ee75 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 7 Jun 2011 15:14:26 -0400 Subject: [PATCH 257/327] hwmon: (coretemp) Drop unused struct members pdev_entry.cpu and pdev_entry.cpu_core_id aren't used anywhere in the driver code so we can drop these struct members. Signed-off-by: Jean Delvare Cc: Fenghua Yu Cc: Guenter Roeck Cc: Durgadoss R Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 85e937984ff7..0070d5476dd0 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -97,9 +97,7 @@ struct platform_data { struct pdev_entry { struct list_head list; struct platform_device *pdev; - unsigned int cpu; u16 phys_proc_id; - u16 cpu_core_id; }; static LIST_HEAD(pdev_list); @@ -653,9 +651,7 @@ static int __cpuinit coretemp_device_add(unsigned int cpu) } pdev_entry->pdev = pdev; - pdev_entry->cpu = cpu; pdev_entry->phys_proc_id = TO_PHYS_ID(cpu); - pdev_entry->cpu_core_id = TO_CORE_ID(cpu); list_add_tail(&pdev_entry->list, &pdev_list); mutex_unlock(&pdev_list_mutex); From 9a2d55be1168b56a5c65321dcce64c97798eaec7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 May 2011 12:19:05 -0700 Subject: [PATCH 258/327] hwmon: (asus_atk0110) Consolidate sysfs attribute initialization Call sysfs_attr_init() from atk_init_attribute() to handle sysfs attribute initialization in a single function. Signed-off-by: Guenter Roeck Cc: Luca Tettamanti Acked-by: Jean Delvare --- drivers/hwmon/asus_atk0110.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index b5e892017e0c..dcb78a7a8047 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -268,6 +268,7 @@ static struct device_attribute atk_name_attr = static void atk_init_attribute(struct device_attribute *attr, char *name, sysfs_show_func show) { + sysfs_attr_init(&attr->attr); attr->attr.name = name; attr->attr.mode = 0444; attr->show = show; @@ -1188,19 +1189,15 @@ static int atk_create_files(struct atk_data *data) int err; list_for_each_entry(s, &data->sensor_list, list) { - sysfs_attr_init(&s->input_attr.attr); err = device_create_file(data->hwmon_dev, &s->input_attr); if (err) return err; - sysfs_attr_init(&s->label_attr.attr); err = device_create_file(data->hwmon_dev, &s->label_attr); if (err) return err; - sysfs_attr_init(&s->limit1_attr.attr); err = device_create_file(data->hwmon_dev, &s->limit1_attr); if (err) return err; - sysfs_attr_init(&s->limit2_attr.attr); err = device_create_file(data->hwmon_dev, &s->limit2_attr); if (err) return err; From 3cdb2052a6e365ad56202874e6a8a05a2bb336fc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 May 2011 12:33:26 -0700 Subject: [PATCH 259/327] hwmon: (ibmaem) Initialize sysfs attributes Initialize dynamically allocated sysfs attributes before device_create_file() call to suppress lockdep_init_map() warning if lockdep debugging is enabled. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare Cc: stable@kernel.org # 2.6.34+ --- drivers/hwmon/ibmaem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index 537409d07ee7..1a409c5bc9bc 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -947,6 +947,7 @@ static int aem_register_sensors(struct aem_data *data, /* Set up read-only sensors */ while (ro->label) { + sysfs_attr_init(&sensors->dev_attr.attr); sensors->dev_attr.attr.name = ro->label; sensors->dev_attr.attr.mode = S_IRUGO; sensors->dev_attr.show = ro->show; @@ -963,6 +964,7 @@ static int aem_register_sensors(struct aem_data *data, /* Set up read-write sensors */ while (rw->label) { + sysfs_attr_init(&sensors->dev_attr.attr); sensors->dev_attr.attr.name = rw->label; sensors->dev_attr.attr.mode = S_IRUGO | S_IWUSR; sensors->dev_attr.show = rw->show; From fb794e0f7153918c33f2300986d995524ab711cf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 May 2011 12:34:12 -0700 Subject: [PATCH 260/327] hwmon: (ibmpex) Initialize sysfs attributes Initialize dynamically allocated sysfs attributes before device_create_file() call to suppress lockdep_init_map() warning if lockdep debugging is enabled. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare Cc: stable@kernel.org # 2.6.34+ --- drivers/hwmon/ibmpex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index 06d4eafcf76b..41dbf8161ed7 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -358,6 +358,7 @@ static int create_sensor(struct ibmpex_bmc_data *data, int type, else if (type == POWER_SENSOR) sprintf(n, power_sensor_name_templates[func], "power", counter); + sysfs_attr_init(&data->sensors[sensor].attr[func].dev_attr.attr); data->sensors[sensor].attr[func].dev_attr.attr.name = n; data->sensors[sensor].attr[func].dev_attr.attr.mode = S_IRUGO; data->sensors[sensor].attr[func].dev_attr.show = ibmpex_show_sensor; From b1e698db0939b04602ded2a2196ff69c92b49378 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 24 May 2011 12:34:55 -0700 Subject: [PATCH 261/327] hwmon: (s3c) Initialize sysfs attributes Initialize dynamically allocated sysfs attributes before device_create_file() call to suppress lockdep_init_map() warning if lockdep debugging is enabled. Signed-off-by: Guenter Roeck Acked-by: Jean Delvare Cc: stable@kernel.org # 2.6.34+ --- drivers/hwmon/s3c-hwmon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/s3c-hwmon.c b/drivers/hwmon/s3c-hwmon.c index 92b42db43bcf..b39f52e2752a 100644 --- a/drivers/hwmon/s3c-hwmon.c +++ b/drivers/hwmon/s3c-hwmon.c @@ -232,6 +232,7 @@ static int s3c_hwmon_create_attr(struct device *dev, attr = &attrs->in; attr->index = channel; + sysfs_attr_init(&attr->dev_attr.attr); attr->dev_attr.attr.name = attrs->in_name; attr->dev_attr.attr.mode = S_IRUGO; attr->dev_attr.show = s3c_hwmon_ch_show; @@ -249,6 +250,7 @@ static int s3c_hwmon_create_attr(struct device *dev, attr = &attrs->label; attr->index = channel; + sysfs_attr_init(&attr->dev_attr.attr); attr->dev_attr.attr.name = attrs->label_name; attr->dev_attr.attr.mode = S_IRUGO; attr->dev_attr.show = s3c_hwmon_label_show; From da40b0b6b4d3a81c5051fe6ae0544c48c13261c4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 18 Jun 2011 02:50:11 -0700 Subject: [PATCH 262/327] Input: evdev - try to wake up readers only if we have full packet We should only wake waiters on the event device when we actually post an EV_SYN/SYN_REPORT to the queue. Otherwise we end up making waiting threads runnable only to go right back to sleep because the device still isn't readable. Reported-by: Jeffrey Brown Signed-off-by: Dmitry Torokhov --- drivers/input/evdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index be0921ef6b52..4cf25347b015 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -111,7 +111,8 @@ static void evdev_event(struct input_handle *handle, rcu_read_unlock(); - wake_up_interruptible(&evdev->wait); + if (type == EV_SYN && code == SYN_REPORT) + wake_up_interruptible(&evdev->wait); } static int evdev_fasync(int fd, struct file *file, int on) From b27af563befa95686c8d1abc491408aa6b9a31dc Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 15 Jun 2011 21:13:55 -0700 Subject: [PATCH 263/327] Input: omap-keypad - add missing input_sync() Otherwise the updated evdev driver (commit cdda911c34006f1089f3c87b1a1f, "Input: evdev - only signal polls on full packets") no longer works on top of omap-keypad. Tested on Amstrad Delta. Signed-off-by: Janusz Krzysztofik Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/omap-keypad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index f23a743817db..33d0bdc837c0 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -209,6 +209,7 @@ static void omap_kp_tasklet(unsigned long data) #endif } } + input_sync(omap_kp_data->input); memcpy(keypad_state, new_state, sizeof(keypad_state)); if (key_down) { From cca23d0b5350c9ca0473625c3f5879422ba534a6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sat, 18 Jun 2011 02:51:52 -0700 Subject: [PATCH 264/327] Input: sh_keysc - 8x8 MODE_6 fix According to the data sheet for G4, AP4 and AG5 KEYSC MODE_6 is 8x8 keys. Bump up MAXKEYS to 64 too. Signed-off-by: Magnus Damm Reviewed-by: Simon Horman Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/sh_keysc.c | 2 +- include/linux/input/sh_keysc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 834cf98e7efb..6876700a4469 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -32,7 +32,7 @@ static const struct { [SH_KEYSC_MODE_3] = { 2, 4, 7 }, [SH_KEYSC_MODE_4] = { 3, 6, 6 }, [SH_KEYSC_MODE_5] = { 4, 6, 7 }, - [SH_KEYSC_MODE_6] = { 5, 7, 7 }, + [SH_KEYSC_MODE_6] = { 5, 8, 8 }, }; struct sh_keysc_priv { diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h index 649dc7f12925..5d253cd93691 100644 --- a/include/linux/input/sh_keysc.h +++ b/include/linux/input/sh_keysc.h @@ -1,7 +1,7 @@ #ifndef __SH_KEYSC_H__ #define __SH_KEYSC_H__ -#define SH_KEYSC_MAXKEYS 49 +#define SH_KEYSC_MAXKEYS 64 struct sh_keysc_info { enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3, From c11760c6d80ab6aa20e383cf378a7287305f591c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jun 2011 10:30:03 -0700 Subject: [PATCH 265/327] isofs: fix bh leak in isofs_fill_super() error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In isofs_fill_super(), when an iso_primary_descriptor is found, it is kept in pri_bh. The error cases don't properly release it. Fix it. Reported-and-tested-by: 김원석 Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/isofs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 3db5ba4568fc..b3cc8586984e 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -974,7 +974,7 @@ out_no_inode: out_no_read: printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", __func__, s->s_id, iso_blknum, block); - goto out_freesbi; + goto out_freebh; out_bad_zone_size: printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", sbi->s_log_zone_size); @@ -989,6 +989,7 @@ out_unknown_format: out_freebh: brelse(bh); + brelse(pri_bh); out_freesbi: kfree(opt.iocharset); kfree(sbi); From 9aa3c94ce59066f545521033007abb6441706068 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2011 11:59:18 -0700 Subject: [PATCH 266/327] ipv4: fix multicast losses Knut Tidemann found that first packet of a multicast flow was not correctly received, and bisected the regression to commit b23dd4fe42b4 (Make output route lookup return rtable directly.) Special thanks to Knut, who provided a very nice bug report, including sample programs to demonstrate the bug. Reported-and-bisectedby: Knut Tidemann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 045f0ec6a4a0..aa13ef105110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1902,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; From 7d68dc3f1003a38948c55c803c32d1989dd49198 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 14 Jun 2011 19:53:09 +0200 Subject: [PATCH 267/327] x86, efi: Do not reserve boot services regions within reserved areas Commit 916f676f8dc started reserving boot service code since some systems require you to keep that code around until SetVirtualAddressMap is called. However, in some cases those areas will overlap with reserved regions. The proper medium-term fix is to fix the bootloader to prevent the conflicts from occurring by moving the kernel to a better position, but the kernel should check for this possibility, and only reserve regions which can be reserved. Signed-off-by: Maarten Lankhorst Link: http://lkml.kernel.org/r/4DF7A005.1050407@gmail.com Acked-by: Matthew Garrett Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/memblock.h | 2 +- arch/x86/mm/memblock.c | 4 ++-- arch/x86/platform/efi/efi.c | 29 +++++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 19ae14ba6978..0cd3800f33b9 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -4,7 +4,6 @@ #define ARCH_DISCARD_MEMBLOCK u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); -void memblock_x86_to_bootmem(u64 start, u64 end); void memblock_x86_reserve_range(u64 start, u64 end, char *name); void memblock_x86_free_range(u64 start, u64 end); @@ -19,5 +18,6 @@ u64 memblock_x86_hole_size(u64 start, u64 end); u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); u64 memblock_x86_memory_in_range(u64 addr, u64 limit); +bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index aa1169392b83..992da5ec5a64 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -8,7 +8,7 @@ #include /* Check for already reserved areas */ -static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align) +bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align) { struct memblock_region *r; u64 addr = *addrp, last; @@ -59,7 +59,7 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) if (addr >= ei_last) continue; *sizep = ei_last - addr; - while (check_with_memblock_reserved_size(&addr, sizep, align)) + while (memblock_x86_check_reserved_size(&addr, sizep, align)) ; if (*sizep) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0d3a4fa34560..474356b98ede 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -310,14 +310,31 @@ void __init efi_reserve_boot_services(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - - memblock_x86_reserve_range(start, start + size, "EFI Boot"); + /* Only reserve where possible: + * - Not within any already allocated areas + * - Not over any memory area (really needed, if above?) + * - Not within any part of the kernel + * - Not the bios reserved area + */ + if ((start+size >= virt_to_phys(_text) + && start <= virt_to_phys(_end)) || + !e820_all_mapped(start, start+size, E820_RAM) || + memblock_x86_check_reserved_size(&start, &size, + 1<num_pages = 0; + memblock_dbg(PFX "Could not reserve boot range " + "[0x%010llx-0x%010llx]\n", + start, start+size-1); + } else + memblock_x86_reserve_range(start, start+size, + "EFI Boot"); } } @@ -334,6 +351,10 @@ static void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + /* Could not reserve boot area */ + if (!size) + continue; + free_bootmem_late(start, size); } } From c1f5c54b57341e872a9d375dccef7257f86033ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 18 Jun 2011 22:51:13 +0200 Subject: [PATCH 268/327] x86, MAINTAINERS: Add x86 MCE people Announce the new x86 MCE infrastructure maintainers. Acked-by: Borislav Petkov Acked-by: Tony Luck Acked-by: H. Peter Anvin Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-8hs7yob6wib4vblmrmbpbav4@git.kernel.org Signed-off-by: Ingo Molnar --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 502f2dd761eb..b12b8c13f53a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7007,6 +7007,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86. S: Maintained F: drivers/platform/x86 +X86 MCE INFRASTRUCTURE +M: Tony Luck +M: Borislav Petkov +L: linux-edac@vger.kernel.org +S: Maintained +F: arch/x86/kernel/cpu/mcheck/* + XEN HYPERVISOR INTERFACE M: Jeremy Fitzhardinge M: Konrad Rzeszutek Wilk From b72336355bb4c92d4a2be3f975dbea47089c83c1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2011 22:11:17 +0200 Subject: [PATCH 269/327] KVM: MMU: Fix build warnings in walk_addr_generic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 3.0-rc1 I get In file included from arch/x86/kvm/mmu.c:2856: arch/x86/kvm/paging_tmpl.h: In function ‘paging32_walk_addr_generic’: arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function In file included from arch/x86/kvm/mmu.c:2852: arch/x86/kvm/paging_tmpl.h: In function ‘paging64_walk_addr_generic’: arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function caused by 6e2ca7d1802bf8ed9908435e34daa116662e7790. According to Takuya Yoshikawa, ptep_user won't be used uninitialized so shut up gcc. Cc: Takuya Yoshikawa Link: http://lkml.kernel.org/r/20110530094604.GC21833@liondog.tnic Signed-off-by: Borislav Petkov Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c4dc010c4cb..9d03ad4dd5ec 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -121,7 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; - pt_element_t __user *ptep_user; + pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; From 5233dd51ece1615d54ab96c4cbe9ac3cc595e955 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jun 2011 14:27:47 -0300 Subject: [PATCH 270/327] KVM: VMX: do not overwrite uptodate vcpu->arch.cr3 on KVM_SET_SREGS Only decache guest CR3 value if vcpu->arch.cr3 is stale. Fixes loadvm with live guest. Signed-off-by: Marcelo Tosatti Tested-by: Markus Schade Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4c3fa0f67469..d48ec60ea421 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2047,7 +2047,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { - vmx_decache_cr3(vcpu); + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + vmx_decache_cr3(vcpu); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, From a0a8eaba1661232f094654422bdabe2df4e26863 Mon Sep 17 00:00:00 2001 From: Steve Date: Fri, 17 Jun 2011 10:25:39 +0800 Subject: [PATCH 271/327] KVM: MMU: fix opposite condition in mapping_level_dirty_bitmap The condition is opposite, it always maps huge page for the dirty tracked page Reported-by: Steve Signed-off-by: Steve Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bd14bb4c8594..aee38623b768 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -565,7 +565,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) { - return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); + return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); } static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) From de2d1a524e94a79078d9fe22c57c0c6009237547 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 15 Jun 2011 20:50:04 -0700 Subject: [PATCH 272/327] KVM: Fix register corruption in pvclock_scale_delta The 128-bit multiply in pvclock.h was missing an output constraint for EDX which caused a register corruption to appear. Thanks to Ulrich for diagnosing the EDX corruption and Avi for providing this fix. Signed-off-by: Zachary Amsden Signed-off-by: Avi Kivity --- arch/x86/include/asm/pvclock.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 31d84acc1512..a518c0a45044 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -22,6 +22,8 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) u64 product; #ifdef __i386__ u32 tmp1, tmp2; +#else + ulong tmp; #endif if (shift < 0) @@ -42,8 +44,11 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( - "mul %%rdx ; shrd $32,%%rdx,%%rax" - : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); + "mul %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a"(product), + [hi]"=d"(tmp) + : "0"(delta), + [mul_frac]"rm"((u64)mul_frac)); #else #error implement me! #endif From df18d127f4fed7a0284bcfa8d2843800cdb63b72 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 17 Jun 2011 16:25:51 -0400 Subject: [PATCH 273/327] pnfs-obj: No longer needed to take an extra ref at add_device Andy's last device_cache patches, already take an extra reference on the newly inserted device_id. So we can remove it from obj-io. Without this patch the device_ids are leaked. Andy's patches are not in Linus tree yet. So I'm not sure if they are scheduled for this Kernel or the next. This patch should be added as part of these. CC: Andy Adamson Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9cf208df1f25..eb4aafa9f521 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -108,7 +108,6 @@ _dev_list_add(const struct nfs_server *nfss, de = n; } - atomic_inc(&de->id_node.ref); return de; } From cefa9993f161c1c2b6b91b7ea2e84a9bfbd43d2e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 19 Jun 2011 16:13:01 -0700 Subject: [PATCH 274/327] netpoll: copy dev name of slaves to struct netpoll Otherwise we will not see the name of the slave dev in error message: [ 388.469446] (null): doesn't support polling, aborting. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + net/bridge/br_device.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 652b30e525d0..eafe44a528ac 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1297,6 +1297,7 @@ static inline int slave_enable_netpoll(struct slave *slave) goto out; np->dev = slave->dev; + strlcpy(np->dev_name, slave->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { kfree(np); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6b2f86378c7..c188c803c09c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -243,6 +243,7 @@ int br_netpoll_enable(struct net_bridge_port *p) goto out; np->dev = p->dev; + strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { From 658924dc9ae2ca8e3c46f36306f5dbd501cf4688 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Jun 2011 12:43:33 +0000 Subject: [PATCH 275/327] hp100: fix an skb->len race As soon as skb is given to hardware and spinlock released, TX completion can free skb under us. Therefore, we should update netdev stats before spinlock release. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/hp100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index 8e10d2f6a5ad..c3ecb118c1df 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -1580,12 +1580,12 @@ static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb, hp100_outl(ringptr->pdl_paddr, TX_PDA_L); /* Low Prio. Queue */ lp->txrcommit++; - spin_unlock_irqrestore(&lp->lock, flags); - /* Update statistics */ dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + spin_unlock_irqrestore(&lp->lock, flags); + return NETDEV_TX_OK; drop: From 44da29d26bb8df3b0411ba902f2bc9b973ea38e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Jun 2011 12:52:36 +0000 Subject: [PATCH 276/327] sgi-xp: fix a use after free Its illegal to dereference skb after dev_kfree_skb(skb) Signed-off-by: Eric Dumazet CC: Robin Holt Signed-off-by: David S. Miller --- drivers/misc/sgi-xp/xpnet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index ee5109a3cd98..42f067347bc7 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -495,14 +495,14 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + if (atomic_dec_return(&queued_msg->use_count) == 0) { dev_kfree_skb(skb); kfree(queued_msg); } - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - return NETDEV_TX_OK; } From 8323fa6ba313ae2664420ec34d56a7fb0bbbe525 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 Jun 2011 13:13:52 -0400 Subject: [PATCH 277/327] drm/radeon/kms/atom: fix duallink on some early DCE3.2 cards Certain revisions of the vbios on DCE3.2 cards have a bug in the transmitter control table which prevents duallink from being enabled properly on some cards. The action switch statement jumps to the wrong offset for the OUTPUT_ENABLE action. The fix is to use the ENABLE action rather than the OUTPUT_ENABLE action on the affected cards. In fixed version of the vbios, both actions jump to the same offset, so the change should be safe. Reported-and-tested-by: Dave Airlie Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index f55b64cb59d1..012f80251e50 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -1431,7 +1431,11 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) if (is_dig) { switch (mode) { case DRM_MODE_DPMS_ON: - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0); + /* some early dce3.2 boards have a bug in their transmitter control table */ + if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730)) + atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); + else + atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0); if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); From 74d074eecbb4778e5f5ee7d59399da971682c532 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 Jun 2011 06:11:30 +0000 Subject: [PATCH 278/327] drm/radeon/kms: add missing param for dce3.2 DP transmitter setup This is used during phy init to set up the phy for DP. This may fix DP problems on DCE3.2 cards. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 012f80251e50..b293487e5aa3 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -1090,9 +1090,10 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t break; } - if (is_dp) + if (is_dp) { args.v2.acConfig.fCoherentMode = 1; - else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { + args.v2.acConfig.fDPConnector = 1; + } else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { if (dig->coherent_mode) args.v2.acConfig.fCoherentMode = 1; if (radeon_encoder->pixel_clock > 165000) From 682f1a54a03513fd6bcede56845f1ba21f48c182 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 18 Jun 2011 03:59:51 +0000 Subject: [PATCH 279/327] drm/radeon: avoid warnings from r600/eg irq handlers on powered off card. Since we were calling the wptr function before checking if the IH was even enabled, or the GPU wasn't shutdown, we'd get spam in the logs when the GPU readback 0xffffffff. This reorders things so we return early in the no IH and GPU shutdown cases. Reported-and-tested-by: ManDay on #radeon Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 17 +++++++---------- drivers/gpu/drm/radeon/r600.c | 15 +++++++-------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 7e3d96e7ac04..7162b7bacac0 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2695,28 +2695,25 @@ static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev) int evergreen_irq_process(struct radeon_device *rdev) { - u32 wptr = evergreen_get_ih_wptr(rdev); - u32 rptr = rdev->ih.rptr; + u32 wptr; + u32 rptr; u32 src_id, src_data; u32 ring_index; unsigned long flags; bool queue_hotplug = false; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); - if (!rdev->ih.enabled) + if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; - spin_lock_irqsave(&rdev->ih.lock, flags); + wptr = evergreen_get_ih_wptr(rdev); + rptr = rdev->ih.rptr; + DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + spin_lock_irqsave(&rdev->ih.lock, flags); if (rptr == wptr) { spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_NONE; } - if (rdev->shutdown) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } - restart_ih: /* display interrupts */ evergreen_irq_ack(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 7dd45ca64e29..dc9fde38ef49 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3294,27 +3294,26 @@ static inline u32 r600_get_ih_wptr(struct radeon_device *rdev) int r600_irq_process(struct radeon_device *rdev) { - u32 wptr = r600_get_ih_wptr(rdev); - u32 rptr = rdev->ih.rptr; + u32 wptr; + u32 rptr; u32 src_id, src_data; u32 ring_index; unsigned long flags; bool queue_hotplug = false; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); - if (!rdev->ih.enabled) + if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; + wptr = r600_get_ih_wptr(rdev); + rptr = rdev->ih.rptr; + DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + spin_lock_irqsave(&rdev->ih.lock, flags); if (rptr == wptr) { spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_NONE; } - if (rdev->shutdown) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } restart_ih: /* display interrupts */ From 4ee1c57fcadfb79a6515698d8001081b5b980e32 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 20 Jun 2011 15:25:35 +1000 Subject: [PATCH 280/327] drm/nouveau: drop leftover debugging this printk isn't really useful, just drop it for now. Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_acpi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index f0d459bb46e4..525744d593c1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -262,7 +262,6 @@ static bool nouveau_dsm_detect(void) vga_count++; retval = nouveau_dsm_pci_probe(pdev); - printk("ret val is %d\n", retval); if (retval & NOUVEAU_DSM_HAS_MUX) has_dsm |= 1; if (retval & NOUVEAU_DSM_HAS_OPT) From 105f4622104848ff1ee1f644d661bef9dec3eb27 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 7 Jun 2011 11:50:23 -0400 Subject: [PATCH 281/327] nfsd4: fix break_lease flags on nfsd open Thanks to Casey Bodley for pointing out that on a read open we pass 0, instead of O_RDONLY, to break_lease, with the result that a read open is treated like a write open for the purposes of lease breaking! Reported-by: Casey Bodley Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f3fb61b69a1e..fd0acca5370a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor } #endif /* CONFIG_NFSD_V3 */ +static int nfsd_open_break_lease(struct inode *inode, int access) +{ + unsigned int mode; + if (access & NFSD_MAY_NOT_BREAK_LEASE) + return 0; + mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY; + return break_lease(inode, mode | O_NONBLOCK); +} /* * Open an existing file or directory. @@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!inode->i_fop) goto out; - /* - * Check to see if there are any leases on this file. - * This may block while leases are broken. - */ - if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) - host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); + host_err = nfsd_open_break_lease(inode, access); if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; From 185bf87393afe6b966881e36c459949d90930a7a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 20 Jun 2011 10:10:24 +0300 Subject: [PATCH 282/327] ubifs: dereferencing an ERR_PTR in ubifs_mount() d251ed271d5 "ubifs: fix sget races" left out the goto from this error path so the static checkers complain that we're dereferencing "sb" when it's an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Al Viro --- fs/ubifs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 8c892c2d5300..529be0582029 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2146,6 +2146,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); + goto out_close; } if (sb->s_root) { From 1712c20dae7b770b62b2e3272100b3b40af0157c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 19:59:04 -0400 Subject: [PATCH 283/327] bad_inode_permission() is safe from RCU mode return -EIO; is *not* a blocking operation, thank you very much. Nick, what the hell have you been smoking? Signed-off-by: Al Viro --- fs/bad_inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 9ad2369d9e35..bfcb18feb1df 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) { - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - return -EIO; } From ec12781f192568f7ea860f440f890389ba393df7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:03:36 -0400 Subject: [PATCH 284/327] cifs_permission() doesn't need to bail out in RCU mode nothing potentially blocking except generic_permission(), which will DTRT Signed-off-by: Al Viro --- fs/cifs/cifsfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e9def996e383..2f0c58646c10 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -257,9 +257,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags) { struct cifs_sb_info *cifs_sb; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - cifs_sb = CIFS_SB(inode->i_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { From 6b419951f1e44c8a46fccfc6551eca9a9980acd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:11:43 -0400 Subject: [PATCH 285/327] coda_ioctl_permission() is safe in RCU mode return (mask & MAY_EXEC) ? -EACCES : 0; is non-blocking... Signed-off-by: Al Viro --- fs/coda/pioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 6cbb3afb36dc..cb140ef293e4 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = { /* the coda pioctl inode ops */ static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) { - if (flags & IPERM_FLAG_RCU) - return -ECHILD; return (mask & MAY_EXEC) ? -EACCES : 0; } From a63ab94d67879bc0630ea9821c582ddf58ba5527 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:17:22 -0400 Subject: [PATCH 286/327] logfs doesn't need ->permission() at all ... and never did, what with its ->permission() being what we do by default when ->permission is NULL... Signed-off-by: Al Viro --- fs/logfs/dir.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1663f8..1afae26cf236 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry, return __logfs_create(dir, dentry, inode, target, destlen); } -static int logfs_permission(struct inode *inode, int mask, unsigned int flags) -{ - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - return generic_permission(inode, mask, flags, NULL); -} - static int logfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = { .mknod = logfs_mknod, .rename = logfs_rename, .rmdir = logfs_rmdir, - .permission = logfs_permission, .symlink = logfs_symlink, .unlink = logfs_unlink, }; From 730e908f3539066d4aa01f4720ebfc750ce4d045 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:21:44 -0400 Subject: [PATCH 287/327] nilfs2_permission() doesn't need to bail out in RCU mode Nothing blocking except for generic_permission(). Which will DTRT. Signed-off-by: Al Viro --- fs/nilfs2/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b954878ad6ce..b9b45fc2903e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -801,12 +801,7 @@ out_err: int nilfs_permission(struct inode *inode, int mask, unsigned int flags) { - struct nilfs_root *root; - - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - - root = NILFS_I(inode)->i_root; + struct nilfs_root *root = NILFS_I(inode)->i_root; if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ From cf1279111686d9742cbc4145bc9d526c83f59fea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:35:23 -0400 Subject: [PATCH 288/327] proc_fd_permission() is doesn't need to bail out in RCU mode nothing blocking except generic_permission() Signed-off-by: Al Viro --- fs/proc/base.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 14def991d9dd..8a84210ca080 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = { */ static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) { - int rv; - - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - rv = generic_permission(inode, mask, flags, NULL); + int rv = generic_permission(inode, mask, flags, NULL); if (rv == 0) return 0; if (task_pid(current) == proc_pid(inode)) From 1d29b5a2ed7eb8862c9b66daf475f3e4c1a40299 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:37:33 -0400 Subject: [PATCH 289/327] reiserfs_permission() doesn't need to bail out in RCU mode nothing blocking other than generic_permission() (and check_acl callback does bail out in RCU mode). Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e8a62f41b458..d78089690965 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s) int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) { - if (flags & IPERM_FLAG_RCU) - return -ECHILD; /* * We don't do permission checks on the internal objects. * Permissions are determined by the "owning" object. From 1aec7036d0c2996c86ce483ca0a28f3b20807b43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Jun 2011 20:42:00 -0400 Subject: [PATCH 290/327] proc_sys_permission() is OK in RCU mode nothing blocking there, since all instances of sysctl ->permissions() method are non-blocking - both of them, that is. Signed-off-by: Al Viro --- fs/proc/proc_sysctl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f50133c11c24..d167de365a8d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) struct ctl_table *table; int error; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - /* Executable files are not allowed under /proc/sys/ */ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) return -EACCES; From 6291176bcd71a2766a19a10cbd9bab07d289e1d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Jun 2011 19:20:48 -0400 Subject: [PATCH 291/327] kill obsolete comment for follow_down() Signed-off-by: Al Viro --- fs/namei.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 9e425e7e6c8f..975c40620fe9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1011,9 +1011,6 @@ failed: * Follow down to the covering mount currently visible to userspace. At each * point, the filesystem owning that dentry may be queried as to whether the * caller is permitted to proceed or not. - * - * Care must be taken as namespace_sem may be held (indicated by mounting_here - * being true). */ int follow_down(struct path *path) { From 8e833fd2e1f0107ee7a4b6bc4de3c9f0e9b0ed41 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jun 2011 01:56:53 -0400 Subject: [PATCH 292/327] fix comment in generic_permission() CAP_DAC_OVERRIDE is enough for MAY_EXEC on directory, even if no exec bits are set. Signed-off-by: Al Viro --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 975c40620fe9..0223c41fb114 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, /* * Read/write DACs are always overridable. - * Executable DACs are overridable if at least one exec bit is set. + * Executable DACs are overridable for all directories and + * for non-directories that have least one exec bit set. */ if (!(mask & MAY_EXEC) || execute_ok(inode)) if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) From 482e0cd3dbaa70f2a2bead4b5f2c0d203ef654ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jun 2011 13:01:04 -0400 Subject: [PATCH 293/327] devcgroup_inode_permission: take "is it a device node" checks to inlined wrapper inode_permission() calls devcgroup_inode_permission() and almost all such calls are _not_ for device nodes; let's at least keep the common path straight... Signed-off-by: Al Viro --- include/linux/device_cgroup.h | 10 +++++++++- security/device_cgroup.c | 8 +------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 0b0d9c39ed67..7aad1f440867 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -2,8 +2,16 @@ #include #ifdef CONFIG_CGROUP_DEVICE -extern int devcgroup_inode_permission(struct inode *inode, int mask); +extern int __devcgroup_inode_permission(struct inode *inode, int mask); extern int devcgroup_inode_mknod(int mode, dev_t dev); +static inline int devcgroup_inode_permission(struct inode *inode, int mask) +{ + if (likely(!inode->i_rdev)) + return 0; + if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + return 0; + return __devcgroup_inode_permission(inode, mask); +} #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index cd1f779fa51d..1be68269e1c2 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -474,17 +474,11 @@ struct cgroup_subsys devices_subsys = { .subsys_id = devices_subsys_id, }; -int devcgroup_inode_permission(struct inode *inode, int mask) +int __devcgroup_inode_permission(struct inode *inode, int mask) { struct dev_cgroup *dev_cgroup; struct dev_whitelist_item *wh; - dev_t device = inode->i_rdev; - if (!device) - return 0; - if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) - return 0; - rcu_read_lock(); dev_cgroup = task_devcgroup(current); From 206b6310fd0268a6ca50cf36f03b0f4eee5602ec Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 20 Jun 2011 10:30:04 -0500 Subject: [PATCH 294/327] jfs: old_agsize should be 64 bits in jfs_extendfs Signed-off-by: Dave Kleikamp --- fs/jfs/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8ea5efb5a34e..8d0c1c7c0820 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -80,7 +80,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) int log_formatted = 0; struct inode *iplist[1]; struct jfs_superblock *j_sb, *j_sb2; - uint old_agsize; + s64 old_agsize; int agsizechanged = 0; struct buffer_head *bh, *bh2; From 28e0fa894cd5996d3007ce82f07226f79beb7286 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 20 Jun 2011 10:32:46 -0500 Subject: [PATCH 295/327] jfs: Update agstart when resizing volume A comment indicates that the IAG's agstart does not need to be updated since it will always point to a block in the same aggregate group, but jfs_fsck isn't so forgiving and reports it as an error. I'm fixing this in jfsutils as well, so either a new kernel or new utilities will be sufficient to fix the problem. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_imap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index ed53a4740168..0533e8f3d19e 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -2921,10 +2921,9 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) continue; } - /* agstart that computes to the same ag is treated as same; */ agstart = le64_to_cpu(iagp->agstart); - /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ n = agstart >> mp->db_agl2size; + iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); /* compute backed inodes */ numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) From d31b53e3cd069e02290ed8a648aa8c7618d6fe77 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 20 Jun 2011 10:53:46 -0500 Subject: [PATCH 296/327] JFS: Don't save agno in the inode Resizing the file system can result in an in-memory inode being remapped to a different aggregate group (AG). A cached AG number can cause problems when trying to free or allocate inodes. Instead, save the IAG's agstart address and calculate the agno when we need it. Signed-off-by: Dave Kleikamp --- fs/jfs/file.c | 6 +++--- fs/jfs/jfs_imap.c | 9 ++++----- fs/jfs/jfs_incore.h | 3 ++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index c5ce6c1d1ff4..2f3f531f3606 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -66,9 +66,9 @@ static int jfs_open(struct inode *inode, struct file *file) struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag == -1) { - ji->active_ag = ji->agno; - atomic_inc( - &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]); + struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); + ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); + atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 0533e8f3d19e..b78b2f978f04 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -397,7 +397,7 @@ int diRead(struct inode *ip) release_metapage(mp); /* set the ag for the inode */ - JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); + JFS_IP(ip)->agstart = agstart; JFS_IP(ip)->active_ag = -1; return (rc); @@ -901,7 +901,7 @@ int diFree(struct inode *ip) /* get the allocation group for this ino. */ - agno = JFS_IP(ip)->agno; + agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); /* Lock the AG specific inode map information */ @@ -1315,12 +1315,11 @@ int diFree(struct inode *ip) static inline void diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) { - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); struct jfs_inode_info *jfs_ip = JFS_IP(ip); ip->i_ino = (iagno << L2INOSPERIAG) + ino; jfs_ip->ixpxd = iagp->inoext[extno]; - jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + jfs_ip->agstart = le64_to_cpu(iagp->agstart); jfs_ip->active_ag = -1; } @@ -1379,7 +1378,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) */ /* get the ag number of this iag */ - agno = JFS_IP(pip)->agno; + agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { /* diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1439f119ec83..5097839b7709 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -50,8 +50,9 @@ struct jfs_inode_info { short btindex; /* btpage entry index*/ struct inode *ipimap; /* inode map */ unsigned long cflag; /* commit flags */ + long agstart; /* agstart of the containing IAG */ u16 bxflag; /* xflag of pseudo buffer? */ - unchar agno; /* ag number */ + unchar pad; signed char active_ag; /* ag currently allocating from */ lid_t blid; /* lid of pseudo buffer? */ lid_t atlhead; /* anonymous tlock list head */ From c82b9d7fe7464aec78210544948564ffe3bb2d2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Jun 2011 20:26:15 +0000 Subject: [PATCH 297/327] netxen: fix race in skb->len access As soon as skb is given to hardware, TX completion can free skb under us. Therefore, we should update dev stats before kicking the device. Signed-off-by: Eric Dumazet CC: Amit Kumar Salecha Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index b644383017f9..c0788a31ff0f 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1965,11 +1965,11 @@ netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) netxen_tso_check(netdev, tx_ring, first_desc, skb); - netxen_nic_update_cmd_producer(adapter, tx_ring); - adapter->stats.txbytes += skb->len; adapter->stats.xmitcalled++; + netxen_nic_update_cmd_producer(adapter, tx_ring); + return NETDEV_TX_OK; drop_packet: From 8ad2475e3555346fbd738e77da12578b97d10505 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 19 Jun 2011 22:31:20 +0000 Subject: [PATCH 298/327] ipv4, ping: Remove duplicate icmp.h include Remove the duplicate inclusion of net/icmp.h from net/ipv4/ping.c Signed-off-by: Jesper Juhl Signed-off-by: David S. Miller --- net/ipv4/ping.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9aaa67165f42..39b403f854c6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include From 19345cb299e8234006c5125151ab723e851a1d24 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Jun 2011 18:33:46 -0400 Subject: [PATCH 299/327] NFSv4.1: file layout must consider pg_bsize for coalescing Otherwise we end up overflowing the rpc buffer size on the receive end. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 6 ++++-- fs/nfs/pagelist.c | 3 ++- include/linux/nfs_page.h | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5d6f369b15d0..0bafcc91c27f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -30,6 +30,7 @@ */ #include +#include #include "internal.h" #include "nfs4filelayout.h" @@ -666,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, u64 p_stripe, r_stripe; u32 stripe_unit; - if (!pnfs_generic_pg_test(pgio, prev, req)) - return 0; + if (!pnfs_generic_pg_test(pgio, prev, req) || + !nfs_generic_pg_test(pgio, prev, req)) + return false; if (!pgio->pg_lseg) return 1; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7913961aff22..009855716286 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { /* * FIXME: ideally we should be able to coalesce all requests @@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p return desc->pg_count + req->wb_bytes <= desc->pg_bsize; } +EXPORT_SYMBOL_GPL(nfs_generic_pg_test); /** * nfs_pageio_init - initialise a page io descriptor diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3a34e80ae92f..25311b3bedf8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -92,6 +92,9 @@ extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); +extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, + struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); From 384420409d9b5d4443940abace49363d26135412 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 19 Jun 2011 21:48:06 +0000 Subject: [PATCH 300/327] pxa168_eth: fix race in transmit path. Because the socket buffer is freed in the completion interrupt, it is not safe to access it after submitting it to the hardware. Cc: stable@kernel.org Cc: Sachin Sanap Cc: Zhangfei Gao Cc: Philip Rakity Signed-off-by: Richard Cochran Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/pxa168_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c index 89f7540d90f9..5f597ca592bb 100644 --- a/drivers/net/pxa168_eth.c +++ b/drivers/net/pxa168_eth.c @@ -1273,7 +1273,7 @@ static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) wmb(); wrl(pep, SDMA_CMD, SDMA_CMD_TXDH | SDMA_CMD_ERD); - stats->tx_bytes += skb->len; + stats->tx_bytes += length; stats->tx_packets++; dev->trans_start = jiffies; if (pep->tx_ring_size - pep->tx_desc_count <= 1) { From ecc90462b428db2ad2ee5081c45496ed10f3a633 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 20 Jun 2011 17:53:24 -0500 Subject: [PATCH 301/327] jfs: agstart field must be 64 bits The previous patch added the agstart field to jfs_ip, but declared it a long. We need to make sure its 64 bits on every platform. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_incore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 5097839b7709..584a4a1a6e81 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -50,7 +50,7 @@ struct jfs_inode_info { short btindex; /* btpage entry index*/ struct inode *ipimap; /* inode map */ unsigned long cflag; /* commit flags */ - long agstart; /* agstart of the containing IAG */ + u64 agstart; /* agstart of the containing IAG */ u16 bxflag; /* xflag of pseudo buffer? */ unchar pad; signed char active_ag; /* ag currently allocating from */ From fdb9c3cd5124c9a6e4c824ed2bca5b4602e84a1a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 21 Apr 2011 23:09:11 +0000 Subject: [PATCH 302/327] msm: timer: Fix DGT rate on 8960 and 8660 The DGT runs at 27 MHz divided by 4 on 8660 and 8960. Signed-off-by: Stephen Boyd Signed-off-by: David Brown --- arch/arm/mach-msm/timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 2232032181be..63621f152c98 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -57,10 +57,12 @@ enum timer_location { #if defined(CONFIG_ARCH_QSD8X50) #define DGT_HZ (19200000 / 4) /* 19.2 MHz / 4 by default */ #define MSM_DGT_SHIFT (0) -#elif defined(CONFIG_ARCH_MSM7X30) || defined(CONFIG_ARCH_MSM8X60) || \ - defined(CONFIG_ARCH_MSM8960) +#elif defined(CONFIG_ARCH_MSM7X30) #define DGT_HZ (24576000 / 4) /* 24.576 MHz (LPXO) / 4 by default */ #define MSM_DGT_SHIFT (0) +#elif defined(CONFIG_ARCH_MSM8X60) || defined(CONFIG_ARCH_MSM8960) +#define DGT_HZ (27000000 / 4) /* 27 MHz (PXO) / 4 by default */ +#define MSM_DGT_SHIFT (0) #else #define DGT_HZ 19200000 /* 19.2 MHz or 600 KHz after shift */ #define MSM_DGT_SHIFT (5) From a377e187df725fe7e62d2cec59ec290c5a605d93 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 Jun 2011 13:00:31 -0400 Subject: [PATCH 303/327] drm/radeon/kms/r6xx+: voltage fixes 0xff01 is not an actual voltage value, but a flag for the driver. If the power state as that value, skip setting the voltage. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 6 ++++++ drivers/gpu/drm/radeon/r600.c | 3 +++ drivers/gpu/drm/radeon/radeon_atombios.c | 4 ++++ drivers/gpu/drm/radeon/rv770.c | 3 +++ 4 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 7162b7bacac0..445af7981637 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -140,11 +140,17 @@ void evergreen_pm_misc(struct radeon_device *rdev) struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage; if (voltage->type == VOLTAGE_SW) { + /* 0xff01 is a flag rather then an actual voltage */ + if (voltage->voltage == 0xff01) + return; if (voltage->voltage && (voltage->voltage != rdev->pm.current_vddc)) { radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC); rdev->pm.current_vddc = voltage->voltage; DRM_DEBUG("Setting: vddc: %d\n", voltage->voltage); } + /* 0xff01 is a flag rather then an actual voltage */ + if (voltage->vddci == 0xff01) + return; if (voltage->vddci && (voltage->vddci != rdev->pm.current_vddci)) { radeon_atom_set_voltage(rdev, voltage->vddci, SET_VOLTAGE_TYPE_ASIC_VDDCI); rdev->pm.current_vddci = voltage->vddci; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index dc9fde38ef49..f79d2ccb6755 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -590,6 +590,9 @@ void r600_pm_misc(struct radeon_device *rdev) struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage; if ((voltage->type == VOLTAGE_SW) && voltage->voltage) { + /* 0xff01 is a flag rather then an actual voltage */ + if (voltage->voltage == 0xff01) + return; if (voltage->voltage != rdev->pm.current_vddc) { radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC); rdev->pm.current_vddc = voltage->voltage; diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index fa62a503ae70..1e725d9f767f 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -2607,6 +2607,10 @@ void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 v if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev)) return; + /* 0xff01 is a flag rather then an actual voltage */ + if (voltage_level == 0xff01) + return; + switch (crev) { case 1: args.v1.ucVoltageType = voltage_type; diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index ef8a5babe9f7..6f508ffd1035 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -105,6 +105,9 @@ void rv770_pm_misc(struct radeon_device *rdev) struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage; if ((voltage->type == VOLTAGE_SW) && voltage->voltage) { + /* 0xff01 is a flag rather then an actual voltage */ + if (voltage->voltage == 0xff01) + return; if (voltage->voltage != rdev->pm.current_vddc) { radeon_atom_set_voltage(rdev, voltage->voltage, SET_VOLTAGE_TYPE_ASIC_VDDC); rdev->pm.current_vddc = voltage->voltage; From 79568f5be06c91071697c065f01f3ebfbeb25a61 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Jun 2011 20:13:49 -0700 Subject: [PATCH 304/327] vfs: i_state needs to be 'unsigned long' for now Commit 13e12d14e2dc ("vfs: reorganize 'struct inode' layout a bit") moved things around a bit changed i_state to be unsigned int instead of unsigned long. That was to help structure layout for the 64-bit case, and shrink 'struct inode' a bit (admittedly that only happened when spinlock debugging was on and i_flags didn't pack with i_lock). However, Meelis Roos reports that this results in unaligned exceptions on sprc, and it turns out that the bit-locking primitives that we use for the I_NEW bit want to use the bitops. Which want 'unsigned long', not 'unsigned int'. We really should fix the bit locking code to not have that kind of requirement, but that's a much bigger change. So for now, revert that field back to 'unsigned long' (but keep the other re-ordering changes from the commit that caused this). Andi points out that we have played games with this in 'struct page', so it's solvable with other hacks too, but since right now the struct inode size advantage only happens with some rare config options, it's not worth fighting. It _would_ be worth fixing the bitlocking code, though. Especially since there is no type safety in the bitlocking code (this never caused any warnings, and worked fine on x86-64, because the bitlocks take a 'void *' and x86-64 doesn't care that deeply about alignment). So it's currently a very easy problem to trigger by mistake and never notice. Reported-by: Meelis Roos Cc: Andi Kleen Cc: David Miller Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c777878f1ea..6e73e2e9ae33 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -744,7 +744,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned int i_flags; - unsigned int i_state; + unsigned long i_state; #ifdef CONFIG_SECURITY void *i_security; #endif From 56299378726d5f2ba8d3c8cbbd13cb280ba45e4f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Jun 2011 20:25:46 -0700 Subject: [PATCH 305/327] Linux 3.0-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0499c2ee8541..41330a06e4ec 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Sneaky Weasel # *DOCUMENTATION* From 8f7d5efbef8718a774ac5e347b4ec069f17fd9b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:22 -0400 Subject: [PATCH 306/327] NFSv4.1: Fix some issues with pnfs_generic_pg_test 1. If the intention is to coalesce requests 'prev' and 'req' then we have to ensure at least that we have a layout starting at req_offset(prev). 2. If we're only requesting a minimal layout of length desc->pg_count, we need to test the length actually returned by the server before we allow the coalescing to occur. 3. We need to deal correctly with (pgio->lseg == NULL) 4. Fixup the test guarding the pnfs_update_layout. Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 3 +++ fs/nfs/pnfs.c | 12 +++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index eb4aafa9f521..8ff2ea3f10ef 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1000,6 +1000,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, if (!pnfs_generic_pg_test(pgio, prev, req)) return false; + if (pgio->pg_lseg == NULL) + return true; + return pgio->pg_count + req->wb_bytes <= OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 539b94cb6c0f..0f42e02436ee 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1064,19 +1064,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, gfp_flags = GFP_NOFS; } - if (pgio->pg_count == prev->wb_bytes) { + if (pgio->pg_lseg == NULL) { + if (pgio->pg_count != prev->wb_bytes) + return true; /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - req_offset(req), + req_offset(prev), pgio->pg_count, access_type, gfp_flags); - return true; + if (pgio->pg_lseg == NULL) + return true; } - if (pgio->pg_lseg && - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) return false; From 19982ba8562e33083cb5bbb59a74855d8a9624ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 10 Jun 2011 13:30:23 -0400 Subject: [PATCH 307/327] NFSv4.1: Fix an off-by-one error in pnfs_generic_pg_test And document what is going on there... Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0f42e02436ee..29c0ca7fc347 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1078,11 +1078,22 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return true; } - if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return false; - - return true; + /* + * Test if a nfs_page is fully contained in the pnfs_layout_range. + * Note that this test makes several assumptions: + * - that the previous nfs_page in the struct nfs_pageio_descriptor + * is known to lie within the range. + * - that the nfs_page being tested is known to be contiguous with the + * previous nfs_page. + * - Layout ranges are page aligned, so we only have to test the + * start offset of the request. + * + * Please also note that 'end_offset' is actually the offset of the + * first byte that lies outside the pnfs_layout_range. FIXME? + * + */ + return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); From 1650add23578b5ca35c1f1e863987180a8c03779 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 2 Jun 2011 15:07:35 -0400 Subject: [PATCH 308/327] NFS: Fix decode_secinfo_maxsz I initially did the calculation in bytes, and not words Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5db44a8dddf9..6870bc61ceec 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -255,7 +255,7 @@ static int nfs4_stat_to_errno(int); #define decode_fs_locations_maxsz \ (0) #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) -#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN))) +#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) #if defined(CONFIG_NFS_V4_1) #define NFS4_MAX_MACHINE_NAME_LEN (64) From 129b656a0de9a229a72fe4bb6bacd134a1477b44 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 10 Jun 2011 16:05:51 -0700 Subject: [PATCH 309/327] PM / Runtime: Update doc: usage count no longer incremented across system PM Commit e8665002477f0278f84f898145b1f141ba26ee26 (PM: Allow pm_runtime_suspend() to succeed during system suspend) removed usage count increment across system PM. Update doc to reflect this. Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 654097b130b4..22accb3eb40e 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -566,11 +566,6 @@ to do this is: pm_runtime_set_active(dev); pm_runtime_enable(dev); -The PM core always increments the run-time usage counter before calling the -->prepare() callback and decrements it after calling the ->complete() callback. -Hence disabling run-time PM temporarily like this will not cause any run-time -suspend callbacks to be lost. - 7. Generic subsystem callbacks Subsystems may wish to conserve code space by using the set of generic power From 78420884e680da8fbc3240de2d3106437042381e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Jun 2011 19:53:57 +0200 Subject: [PATCH 310/327] PM: Update documentation regarding sysdevs The part of Documentation/power/devices.txt regarding sysdevs is not valid any more after commit 2e711c04dbbf7a7732a3f7073b1fc285d12b369d (PM: Remove sysdev suspend, resume and shutdown operations), so remove it. Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 88880839ece4..ff923fe67d98 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -549,32 +549,6 @@ callbacks. The other platforms need not implement it or take it into account in any way. -System Devices --------------- -System devices (sysdevs) follow a slightly different API, which can be found in - - include/linux/sysdev.h - drivers/base/sys.c - -System devices will be suspended with interrupts disabled, and after all other -devices have been suspended. On resume, they will be resumed before any other -devices, and also with interrupts disabled. These things occur in special -"sysdev_driver" phases, which affect only system devices. - -Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when -the non-boot CPUs are all offline and IRQs are disabled on the remaining online -CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a -sleep state (or a system image is created). During resume (or after the image -has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs -are enabled on the only online CPU, the non-boot CPUs are enabled, and the -resume_noirq (or thaw_noirq or restore_noirq) phase begins. - -Code to actually enter and exit the system-wide low power state sometimes -involves hardware details that are only known to the boot firmware, and -may leave a CPU running software (from SRAM or flash memory) that monitors -the system and manages its wakeup sequence. - - Device Low Power (suspend) States --------------------------------- Device low-power states aren't standard. One device might only handle From f76b168b6f117a49d36307053e1acbe30580ea5b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 20:22:23 +0200 Subject: [PATCH 311/327] PM: Rename dev_pm_info.in_suspend to is_prepared This patch (as1473) renames the "in_suspend" field in struct dev_pm_info to "is_prepared", in preparation for an upcoming change. The new name is more descriptive of what the field really means. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- drivers/base/power/main.c | 14 +++++++++----- drivers/usb/core/driver.c | 6 +++--- include/linux/device.h | 4 ++-- include/linux/pm.h | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index aa6320207745..bf5a59ac1957 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -57,7 +57,7 @@ static int async_error; */ void device_pm_init(struct device *dev) { - dev->power.in_suspend = false; + dev->power.is_prepared = false; init_completion(&dev->power.completion); complete_all(&dev->power.completion); dev->power.wakeup = NULL; @@ -91,7 +91,7 @@ void device_pm_add(struct device *dev) pr_debug("PM: Adding info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); mutex_lock(&dpm_list_mtx); - if (dev->parent && dev->parent->power.in_suspend) + if (dev->parent && dev->parent->power.is_prepared) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); @@ -511,7 +511,11 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) dpm_wait(dev->parent, async); device_lock(dev); - dev->power.in_suspend = false; + /* + * This is a fib. But we'll allow new children to be added below + * a resumed device, even if the device hasn't been completed yet. + */ + dev->power.is_prepared = false; if (dev->pwr_domain) { pm_dev_dbg(dev, state, "power domain "); @@ -670,7 +674,7 @@ void dpm_complete(pm_message_t state) struct device *dev = to_device(dpm_prepared_list.prev); get_device(dev); - dev->power.in_suspend = false; + dev->power.is_prepared = false; list_move(&dev->power.entry, &list); mutex_unlock(&dpm_list_mtx); @@ -1042,7 +1046,7 @@ int dpm_prepare(pm_message_t state) put_device(dev); break; } - dev->power.in_suspend = true; + dev->power.is_prepared = true; if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); put_device(dev); diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index e35a17687c05..aa3cc465a601 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -375,7 +375,7 @@ static int usb_unbind_interface(struct device *dev) * Just re-enable it without affecting the endpoint toggles. */ usb_enable_interface(udev, intf, false); - } else if (!error && !intf->dev.power.in_suspend) { + } else if (!error && !intf->dev.power.is_prepared) { r = usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); if (r < 0) @@ -960,7 +960,7 @@ void usb_rebind_intf(struct usb_interface *intf) } /* Try to rebind the interface */ - if (!intf->dev.power.in_suspend) { + if (!intf->dev.power.is_prepared) { intf->needs_binding = 0; rc = device_attach(&intf->dev); if (rc < 0) @@ -1107,7 +1107,7 @@ static int usb_resume_interface(struct usb_device *udev, if (intf->condition == USB_INTERFACE_UNBOUND) { /* Carry out a deferred switch to altsetting 0 */ - if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) { + if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) { usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); intf->needs_altsetting0 = 0; diff --git a/include/linux/device.h b/include/linux/device.h index c66111affca9..553fd37b173b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -654,13 +654,13 @@ static inline int device_is_registered(struct device *dev) static inline void device_enable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = true; } static inline void device_disable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = false; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 3160648ccdda..cc536bd80984 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -425,7 +425,7 @@ struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; unsigned int async_suspend:1; - unsigned int in_suspend:1; /* Owned by the PM core */ + bool is_prepared:1; /* Owned by the PM core */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; From 8440f4b19494467883f8541b7aa28c7bbf6ac92b Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sat, 18 Jun 2011 20:34:01 +0200 Subject: [PATCH 312/327] PM: Free memory bitmaps if opening /dev/snapshot fails When opening /dev/snapshot device, snapshot_open() creates memory bitmaps which are freed in snapshot_release(). But if any of the callbacks called by pm_notifier_call_chain() returns NOTIFY_BAD, open() fails, snapshot_release() is never called and bitmaps are not freed. Next attempt to open /dev/snapshot then triggers BUG_ON() check in create_basic_memory_bitmaps(). This happens e.g. when vmwatchdog module is active on s390x. Signed-off-by: Michal Kubecek Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- kernel/power/user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/power/user.c b/kernel/power/user.c index 7d02d33be699..42ddbc6f0de6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -113,8 +113,10 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_RESTORE); } - if (error) + if (error) { + free_basic_memory_bitmaps(); atomic_inc(&snapshot_device_available); + } data->frozen = 0; data->ready = 0; data->platform_support = 0; From 6d0e0e84f66d32c33511984dd3badd32364b863c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 22:42:09 +0200 Subject: [PATCH 313/327] PM: Fix async resume following suspend failure The PM core doesn't handle suspend failures correctly when it comes to asynchronously suspended devices. These devices are moved onto the dpm_suspended_list as soon as the corresponding async thread is started up, and they remain on the list even if they fail to suspend or the sleep transition is cancelled before they get suspended. As a result, when the PM core unwinds the transition, it tries to resume the devices even though they were never suspended. This patch (as1474) fixes the problem by adding a new "is_suspended" flag to dev_pm_info. Devices are resumed only if the flag is set. [rjw: * Moved the dev->power.is_suspended check into device_resume(), because we need to complete dev->power.completion and clear dev->power.is_prepared too for devices whose dev->power.is_suspended flags are unset. * Fixed __device_suspend() to avoid setting dev->power.is_suspended if async_error is different from zero.] Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- drivers/base/power/main.c | 14 ++++++++++++-- include/linux/pm.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index bf5a59ac1957..06f09bf89cb2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -58,6 +58,7 @@ static int async_error; void device_pm_init(struct device *dev) { dev->power.is_prepared = false; + dev->power.is_suspended = false; init_completion(&dev->power.completion); complete_all(&dev->power.completion); dev->power.wakeup = NULL; @@ -517,6 +518,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) */ dev->power.is_prepared = false; + if (!dev->power.is_suspended) + goto Unlock; + if (dev->pwr_domain) { pm_dev_dbg(dev, state, "power domain "); error = pm_op(dev, &dev->pwr_domain->ops, state); @@ -552,6 +556,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) } End: + dev->power.is_suspended = false; + + Unlock: device_unlock(dev); complete_all(&dev->power.completion); @@ -839,11 +846,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_lock(dev); if (async_error) - goto End; + goto Unlock; if (pm_wakeup_pending()) { async_error = -EBUSY; - goto End; + goto Unlock; } if (dev->pwr_domain) { @@ -881,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } End: + dev->power.is_suspended = !error; + + Unlock: device_unlock(dev); complete_all(&dev->power.completion); diff --git a/include/linux/pm.h b/include/linux/pm.h index cc536bd80984..411e4f4be52b 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -426,6 +426,7 @@ struct dev_pm_info { unsigned int can_wakeup:1; unsigned int async_suspend:1; bool is_prepared:1; /* Owned by the PM core */ + bool is_suspended:1; /* Ditto */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; From 4d1518f5668ef1b3dff6c3b30fa761fe5573cdaa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Jun 2011 23:24:33 +0200 Subject: [PATCH 314/327] PM / Runtime: Handle clocks correctly if CONFIG_PM_RUNTIME is unset Commit 85eb8c8d0b0900c073b0e6f89979ac9c439ade1a (PM / Runtime: Generic clock manipulation rountines for runtime PM (v6)) converted the shmobile platform to using generic code for runtime PM clock management, but it changed the behavior for CONFIG_PM_RUNTIME unset incorrectly. Specifically, for CONFIG_PM_RUNTIME unset pm_runtime_clk_notify() should enable clocks for action equal to BUS_NOTIFY_BIND_DRIVER and it should disable them for action equal to BUS_NOTIFY_UNBOUND_DRIVER (instead of BUS_NOTIFY_ADD_DEVICE and BUS_NOTIFY_DEL_DEVICE, respectively). Make this function behave as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- drivers/base/power/clock_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index eaa8a854af03..ad367c4139b1 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -387,7 +387,7 @@ static int pm_runtime_clk_notify(struct notifier_block *nb, clknb = container_of(nb, struct pm_clk_notifier_block, nb); switch (action) { - case BUS_NOTIFY_ADD_DEVICE: + case BUS_NOTIFY_BIND_DRIVER: if (clknb->con_ids[0]) { for (con_id = clknb->con_ids; *con_id; con_id++) enable_clock(dev, *con_id); @@ -395,7 +395,7 @@ static int pm_runtime_clk_notify(struct notifier_block *nb, enable_clock(dev, NULL); } break; - case BUS_NOTIFY_DEL_DEVICE: + case BUS_NOTIFY_UNBOUND_DRIVER: if (clknb->con_ids[0]) { for (con_id = clknb->con_ids; *con_id; con_id++) disable_clock(dev, *con_id); From ca9c6890b598997165a7c85c001f382c910f12b0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Jun 2011 23:25:32 +0200 Subject: [PATCH 315/327] PM / Domains: Update documentation Commit 4d27e9dcff00a6425d779b065ec8892e4f391661 (PM: Make power domain callbacks take precedence over subsystem ones) forgot to update the device power management documentation to take changes made by it into account. Correct that mistake. Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 39 +++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index ff923fe67d98..64565aac6e40 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -520,33 +520,20 @@ Support for power domains is provided through the pwr_domain field of struct device. This field is a pointer to an object of type struct dev_power_domain, defined in include/linux/pm.h, providing a set of power management callbacks analogous to the subsystem-level and device driver callbacks that are executed -for the given device during all power transitions, in addition to the respective -subsystem-level callbacks. Specifically, the power domain "suspend" callbacks -(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are -executed after the analogous subsystem-level callbacks, while the power domain -"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore, -etc.) are executed before the analogous subsystem-level callbacks. Error codes -returned by the "suspend" and "resume" power domain callbacks are ignored. +for the given device during all power transitions, instead of the respective +subsystem-level callbacks. Specifically, if a device's pm_domain pointer is +not NULL, the ->suspend() callback from the object pointed to by it will be +executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and +anlogously for all of the remaining callbacks. In other words, power management +domain callbacks, if defined for the given device, always take precedence over +the callbacks provided by the device's subsystem (e.g. bus type). -Power domain ->runtime_idle() callback is executed before the subsystem-level -->runtime_idle() callback and the result returned by it is not ignored. Namely, -if it returns error code, the subsystem-level ->runtime_idle() callback will not -be called and the helper function rpm_idle() executing it will return error -code. This mechanism is intended to help platforms where saving device state -is a time consuming operation and should only be carried out if all devices -in the power domain are idle, before turning off the shared power resource(s). -Namely, the power domain ->runtime_idle() callback may return error code until -the pm_runtime_idle() helper (or its asychronous version) has been called for -all devices in the power domain (it is recommended that the returned error code -be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle() -callback from being run prematurely. - -The support for device power domains is only relevant to platforms needing to -use the same subsystem-level (e.g. platform bus type) and device driver power -management callbacks in many different power domain configurations and wanting -to avoid incorporating the support for power domains into the subsystem-level -callbacks. The other platforms need not implement it or take it into account -in any way. +The support for device power management domains is only relevant to platforms +needing to use the same device driver power management callbacks in many +different power domain configurations and wanting to avoid incorporating the +support for power domains into subsystem-level callbacks, for example by +modifying the platform bus type. Other platforms need not implement it or take +it into account in any way. Device Low Power (suspend) States From a5f76d5eba157bf637beb2dd18026db2917c512e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Jun 2011 23:47:15 +0200 Subject: [PATCH 316/327] PCI / PM: Block races between runtime PM and system sleep After commit e8665002477f0278f84f898145b1f141ba26ee26 (PM: Allow pm_runtime_suspend() to succeed during system suspend) it is possible that a device resumed by the pm_runtime_resume(dev) in pci_pm_prepare() will be suspended immediately from a work item, timer function or otherwise, defeating the very purpose of calling pm_runtime_resume(dev) from there. To prevent that from happening it is necessary to increment the runtime PM usage counter of the device by replacing pm_runtime_resume() with pm_runtime_get_sync(). Moreover, the incremented runtime PM usage counter has to be decremented by the corresponding pci_pm_complete(), via pm_runtime_put_sync(). Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org Acked-by: Jesse Barnes --- drivers/pci/pci-driver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 135df164a4c1..46767c53917a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -624,7 +624,7 @@ static int pci_pm_prepare(struct device *dev) * system from the sleep state, we'll have to prevent it from signaling * wake-up. */ - pm_runtime_resume(dev); + pm_runtime_get_sync(dev); if (drv && drv->pm && drv->pm->prepare) error = drv->pm->prepare(dev); @@ -638,6 +638,8 @@ static void pci_pm_complete(struct device *dev) if (drv && drv->pm && drv->pm->complete) drv->pm->complete(dev); + + pm_runtime_put_sync(dev); } #else /* !CONFIG_PM_SLEEP */ From 35052cffe0081904f3362c05818db900dd9dc7de Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Jun 2011 10:29:51 +0100 Subject: [PATCH 317/327] MN10300: asm/uaccess.h needs to #include linux/kernel.h for might_sleep() MN10300's asm/uaccess.h needs to #include linux/kernel.h to get might_sleep() otherwise it fails to build on MN10300 allyesconfig. This fails in a few places with messages like the following: In file included from security/keys/trusted.c:14: include/linux/uaccess.h: In function '__copy_from_user_nocache': include/linux/uaccess.h:52: error: implicit declaration of function 'might_sleep' Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/mn10300/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 3d6e60dad9d9..780560b330d9 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -15,6 +15,7 @@ * User space memory access functions */ #include +#include #include #include From b1d7dd80aadb9042e83f9778b484a2f92e0b04d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Jun 2011 14:32:05 +0100 Subject: [PATCH 318/327] KEYS: Fix error handling in construct_key_and_link() Fix error handling in construct_key_and_link(). If construct_alloc_key() returns an error, it shouldn't pass out through the normal path as the key_serial() called by the kleave() statement will oops when it gets an error code in the pointer: BUG: unable to handle kernel paging request at ffffffffffffff84 IP: [] request_key_and_link+0x4d7/0x52f .. Call Trace: [] request_key+0x41/0x75 [] cifs_get_spnego_key+0x206/0x226 [cifs] [] CIFS_SessSetup+0x511/0x1234 [cifs] [] cifs_setup_session+0x90/0x1ae [cifs] [] cifs_get_smb_ses+0x34b/0x40f [cifs] [] cifs_mount+0x13f/0x504 [cifs] [] cifs_do_mount+0xc4/0x672 [cifs] [] mount_fs+0x69/0x155 [] vfs_kern_mount+0x63/0xa0 [] do_kern_mount+0x4d/0xdf [] do_mount+0x63c/0x69f [] sys_mount+0x88/0xc2 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Howells Acked-by: Jeff Layton Signed-off-by: Linus Torvalds --- security/keys/request_key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 8e319a416eec..82465328c39b 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -469,7 +469,7 @@ static struct key *construct_key_and_link(struct key_type *type, } else if (ret == -EINPROGRESS) { ret = 0; } else { - key = ERR_PTR(ret); + goto couldnt_alloc_key; } key_put(dest_keyring); @@ -479,6 +479,7 @@ static struct key *construct_key_and_link(struct key_type *type, construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); +couldnt_alloc_key: key_put(dest_keyring); kleave(" = %d", ret); return ERR_PTR(ret); From 7553e8f2d5161a2b7a9b7a9f37be1b77e735552f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Jun 2011 18:13:01 -0700 Subject: [PATCH 319/327] mm, hotplug: fix error handling in mem_online_node() The error handling in mem_online_node() is incorrect: hotadd_new_pgdat() returns NULL if the new pgdat could not have been allocated and a pointer to it otherwise. mem_online_node() should fail if hotadd_new_pgdat() fails, not the inverse. This fixes an issue when memoryless nodes are not onlined and their sysfs interface is not registered when their first cpu is brought up. The bug was introduced by commit cf23422b9d76 ("cpu/mem hotplug: enable CPUs online before local memory online") iow v2.6.35. Signed-off-by: David Rientjes Reviewed-by: KOSAKI Motohiro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 02159c755136..78dba9f04b5b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -521,7 +521,7 @@ int mem_online_node(int nid) lock_memory_hotplug(); pgdat = hotadd_new_pgdat(nid, 0); - if (pgdat) { + if (!pgdat) { ret = -ENOMEM; goto out; } From f957db4fcdd8f03e186aa8f041f4049e76ab741c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Jun 2011 18:13:04 -0700 Subject: [PATCH 320/327] mm, hotplug: protect zonelist building with zonelists_mutex Commit 959ecc48fc75 ("mm/memory_hotplug.c: fix building of node hotplug zonelist") does not protect the build_all_zonelists() call with zonelists_mutex as needed. This can lead to races in constructing zonelist ordering if a concurrent build is underway. Protecting this with lock_memory_hotplug() is insufficient since zonelists can be rebuild though sysfs as well. Signed-off-by: David Rientjes Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 78dba9f04b5b..c46887b5a11e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -498,7 +498,9 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) * The node we allocated has no zone fallback lists. For avoiding * to access not-initialized zonelist, build here. */ + mutex_lock(&zonelists_mutex); build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); return pgdat; } From 06c8a6a3e40265ff580428bbf51991617477f65b Mon Sep 17 00:00:00 2001 From: Damian Hobson-Garcia Date: Wed, 22 Jun 2011 07:46:25 +0000 Subject: [PATCH 321/327] fbdev: sh_mobile_meram: Correct pointer check for YCbCr chroma plane The check was intended to test if we have a valid pointer to write into, but it mistakenly checks the pointer contents instead. Since a valid pointer is mandatory for the chroma data if a YCbCr format is used, the pointer check has been removed. Signed-off-by: Damian Hobson-Garcia Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_meram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/sh_mobile_meram.c b/drivers/video/sh_mobile_meram.c index 9170c82b495c..cc7d7329dc15 100644 --- a/drivers/video/sh_mobile_meram.c +++ b/drivers/video/sh_mobile_meram.c @@ -218,7 +218,7 @@ static inline void meram_get_next_icb_addr(struct sh_mobile_meram_info *pdata, icb_offset = 0xc0000000 | (cfg->current_reg << 23); *icb_addr_y = icb_offset | (cfg->icb[0].marker_icb << 24); - if ((*icb_addr_c) && is_nvcolor(cfg->pixelformat)) + if (is_nvcolor(cfg->pixelformat)) *icb_addr_c = icb_offset | (cfg->icb[1].marker_icb << 24); } From 3535ed3fa7a87244410696880000e03bc224315d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 16 Jun 2011 19:31:19 +0000 Subject: [PATCH 322/327] gx1fb: Fix section mismatch warnings Fix a chain of section mismatches in geode driver, beginning with: WARNING: drivers/video/geode/gx1fb.o(.data+0x70): Section mismatch in reference from the variable gx1fb_driver to the function .init.text:gx1fb_probe() The variable gx1fb_driver references the function __init gx1fb_probe() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console Making the changes that Paul pointed out resulted in a few more changes being needed, so they are all included here. Signed-off-by: Randy Dunlap Signed-off-by: Paul Mundt --- drivers/video/geode/gx1fb_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c index c6b554f72c6d..5a5d0928df33 100644 --- a/drivers/video/geode/gx1fb_core.c +++ b/drivers/video/geode/gx1fb_core.c @@ -29,7 +29,7 @@ static int crt_option = 1; static char panel_option[32] = ""; /* Modes relevant to the GX1 (taken from modedb.c) */ -static const struct fb_videomode __initdata gx1_modedb[] = { +static const struct fb_videomode __devinitdata gx1_modedb[] = { /* 640x480-60 VESA */ { NULL, 60, 640, 480, 39682, 48, 16, 33, 10, 96, 2, 0, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA }, @@ -195,7 +195,7 @@ static int gx1fb_blank(int blank_mode, struct fb_info *info) return par->vid_ops->blank_display(info, blank_mode); } -static int __init gx1fb_map_video_memory(struct fb_info *info, struct pci_dev *dev) +static int __devinit gx1fb_map_video_memory(struct fb_info *info, struct pci_dev *dev) { struct geodefb_par *par = info->par; unsigned gx_base; @@ -268,7 +268,7 @@ static struct fb_ops gx1fb_ops = { .fb_imageblit = cfb_imageblit, }; -static struct fb_info * __init gx1fb_init_fbinfo(struct device *dev) +static struct fb_info * __devinit gx1fb_init_fbinfo(struct device *dev) { struct geodefb_par *par; struct fb_info *info; @@ -318,7 +318,7 @@ static struct fb_info * __init gx1fb_init_fbinfo(struct device *dev) return info; } -static int __init gx1fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int __devinit gx1fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct geodefb_par *par; struct fb_info *info; @@ -382,7 +382,7 @@ static int __init gx1fb_probe(struct pci_dev *pdev, const struct pci_device_id * return ret; } -static void gx1fb_remove(struct pci_dev *pdev) +static void __devexit gx1fb_remove(struct pci_dev *pdev) { struct fb_info *info = pci_get_drvdata(pdev); struct geodefb_par *par = info->par; @@ -441,7 +441,7 @@ static struct pci_driver gx1fb_driver = { .name = "gx1fb", .id_table = gx1fb_id_table, .probe = gx1fb_probe, - .remove = gx1fb_remove, + .remove = __devexit_p(gx1fb_remove), }; static int __init gx1fb_init(void) @@ -456,7 +456,7 @@ static int __init gx1fb_init(void) return pci_register_driver(&gx1fb_driver); } -static void __exit gx1fb_cleanup(void) +static void __devexit gx1fb_cleanup(void) { pci_unregister_driver(&gx1fb_driver); } From 9845afc8fa32de145d56c8e69b7900e10371255d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 16 Jun 2011 19:32:40 +0000 Subject: [PATCH 323/327] sm501fb: fix section mismatch warning Fix section mismatch warning in sm501fb: WARNING: drivers/video/sm501fb.o(.text+0x21d6): Section mismatch in reference from the function sm501fb_init_fb() to the variable .devinit.data:sm501_default_mode The function sm501fb_init_fb() references the variable __devinitdata sm501_default_mode. This is often because sm501fb_init_fb lacks a __devinitdata annotation or the annotation of sm501_default_mode is wrong. Signed-off-by: Randy Dunlap Signed-off-by: Paul Mundt --- drivers/video/sm501fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index 87f0be1e78b5..6294dca95500 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -1664,7 +1664,7 @@ static void sm501fb_stop(struct sm501fb_info *info) resource_size(info->regs_res)); } -static int sm501fb_init_fb(struct fb_info *fb, +static int __devinit sm501fb_init_fb(struct fb_info *fb, enum sm501_controller head, const char *fbname) { From 291600193e5c0c3f0a9af1f23a8076dd7417c02a Mon Sep 17 00:00:00 2001 From: Pavel Shved Date: Fri, 17 Jun 2011 16:25:12 +0000 Subject: [PATCH 324/327] hecubafb: add module_put on error path in hecubafb_probe() In hecubafb_probe(), after a successful try_module_get, vzalloc may fail and make the hecubafb_probe return, but the module is not put on this error path. This patch adds an exit point that calls module_put in such situation. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Pavel Shved Signed-off-by: Paul Mundt --- drivers/video/hecubafb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c index fbef15f7a218..614251a9af91 100644 --- a/drivers/video/hecubafb.c +++ b/drivers/video/hecubafb.c @@ -233,7 +233,7 @@ static int __devinit hecubafb_probe(struct platform_device *dev) videomemory = vzalloc(videomemorysize); if (!videomemory) - return retval; + goto err_videomem_alloc; info = framebuffer_alloc(sizeof(struct hecubafb_par), &dev->dev); if (!info) @@ -275,6 +275,7 @@ err_fbreg: framebuffer_release(info); err_fballoc: vfree(videomemory); +err_videomem_alloc: module_put(board->owner); return retval; } From 9377c51752970c305fae29ac634501fde44378cb Mon Sep 17 00:00:00 2001 From: William Katsak Date: Thu, 23 Jun 2011 13:16:29 +0000 Subject: [PATCH 325/327] udlfb: Correct sub-optimal resolution selection. The situation in which the problem occurred was with a Plugable UGA-2K-A connected to a Samsung EX2220X display. The driver indicates that 1920x1080 is a valid mode (the first mode available, in fact), but proceeds to set the framebuffer size to 1600x1200. The patch corrects what seems to be a logic error, regarding unsetting the FB_MISC_1ST_DETAIL flag, if the first (top/best) mode is invalid. The existing code unset the flag if ANY mode was invalid. Signed-off-by: William Katsak Signed-off-by: Paul Mundt --- drivers/video/udlfb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c index 52b0f3e8ccac..816a4fda04f5 100644 --- a/drivers/video/udlfb.c +++ b/drivers/video/udlfb.c @@ -1233,8 +1233,12 @@ static int dlfb_setup_modes(struct dlfb_data *dev, if (dlfb_is_valid_mode(&info->monspecs.modedb[i], info)) fb_add_videomode(&info->monspecs.modedb[i], &info->modelist); - else /* if we've removed top/best mode */ - info->monspecs.misc &= ~FB_MISC_1ST_DETAIL; + else { + if (i == 0) + /* if we've removed top/best mode */ + info->monspecs.misc + &= ~FB_MISC_1ST_DETAIL; + } } default_vmode = fb_find_best_display(&info->monspecs, From 39785eb1d3e6c58cc8bf8f6990956a58037ecc75 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 23 Jun 2011 20:20:26 +0000 Subject: [PATCH 326/327] fsl-diu-fb: remove check for pixel clock ranges The Freescale DIU framebuffer driver defines two constants, MIN_PIX_CLK and MAX_PIX_CLK, that are supposed to represent the lower and upper limits of the pixel clock. These values, however, are true only for one platform clock rate (533MHz) and only for the MPC8610. So the actual range for the pixel clock is chip-specific, which means the current values are almost always wrong. The chance of an out-of-range pixel clock being used are also remote. Rather than try to detect an out-of-range clock in the DIU driver, we depend on the board-specific pixel clock function (e.g. p1022ds_set_pixel_clock) to clamp the pixel clock to a supported value. Signed-off-by: Timur Tabi Signed-off-by: Paul Mundt --- drivers/video/fsl-diu-fb.c | 16 ---------------- include/linux/fsl-diu-fb.h | 6 ------ 2 files changed, 22 deletions(-) diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index bedf5be27f05..0acc7d65aeaa 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -555,8 +555,6 @@ static void adjust_aoi_size_position(struct fb_var_screeninfo *var, static int fsl_diu_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { - unsigned long htotal, vtotal; - pr_debug("check_var xres: %d\n", var->xres); pr_debug("check_var yres: %d\n", var->yres); @@ -635,20 +633,6 @@ static int fsl_diu_check_var(struct fb_var_screeninfo *var, break; } - /* If the pixclock is below the minimum spec'd value then set to - * refresh rate for 60Hz since this is supported by most monitors. - * Refer to Documentation/fb/ for calculations. - */ - if ((var->pixclock < MIN_PIX_CLK) || (var->pixclock > MAX_PIX_CLK)) { - htotal = var->xres + var->right_margin + var->hsync_len + - var->left_margin; - vtotal = var->yres + var->lower_margin + var->vsync_len + - var->upper_margin; - var->pixclock = (vtotal * htotal * 6UL) / 100UL; - var->pixclock = KHZ2PICOS(var->pixclock); - pr_debug("pixclock set for 60Hz refresh = %u ps\n", - var->pixclock); - } var->height = -1; var->width = -1; diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h index 781d4671415f..daa9952d2174 100644 --- a/include/linux/fsl-diu-fb.h +++ b/include/linux/fsl-diu-fb.h @@ -24,12 +24,6 @@ * See mpc8610fb_set_par(), map_video_memory(), and unmap_video_memory() */ #define MEM_ALLOC_THRESHOLD (1024*768*4+32) -/* Minimum value that the pixel clock can be set to in pico seconds - * This is determined by platform clock/3 where the minimum platform - * clock is 533MHz. This gives 5629 pico seconds. - */ -#define MIN_PIX_CLK 5629 -#define MAX_PIX_CLK 96096 #include From 17e8c4e1ebf139743e3830439fa65fd906af4a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Minier?= Date: Mon, 20 Jun 2011 20:44:17 +0000 Subject: [PATCH 327/327] fbdev: amba: Link fb device to its parent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some pieces of userspace like debian-installer expect to find the fb0 driver name by readlink-ing /sys/class/graphics/fb0/device/driver but this was broken with amba-clcd as it sets up fb_info manually and missed the .device parent pointer. Signed-off-by: Loïc Minier Cc: Russell King Signed-off-by: Paul Mundt --- drivers/video/amba-clcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 5fc983c5b92c..cf03ad067147 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -447,6 +447,8 @@ static int clcdfb_register(struct clcd_fb *fb) goto out; } + fb->fb.device = &fb->dev->dev; + fb->fb.fix.mmio_start = fb->dev->res.start; fb->fb.fix.mmio_len = resource_size(&fb->dev->res);