From 3a45c9e4b192c9e96f65a8473c7a28e09cf2ac2a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Sat, 13 Jun 2009 10:43:19 +0100 Subject: [PATCH 001/741] MAINTAINERS: Update file list for ARM/S3C2410 and ARM/S3C2440 Add F: entries for ARM/S3C2410 and ARM/S3C2440 to update the entries. Signed-off-by: Ben Dooks --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 90f81283b722..fa9377df160d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -830,6 +830,7 @@ M: ben-linux@fluff.org L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.fluff.org/ben/linux/ S: Maintained +F: arch/arm/mach-s3c2410/ ARM/S3C2440 ARM ARCHITECTURE P: Ben Dooks @@ -837,6 +838,7 @@ M: ben-linux@fluff.org L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.fluff.org/ben/linux/ S: Maintained +F: arch/arm/mach-s3c2440/ ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT P: Lennert Buytenhek From b21477f9d257cd8d45f7df32a9ebed45dbdfa4b5 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Sat, 13 Jun 2009 10:46:34 +0100 Subject: [PATCH 002/741] MAINTAINERS: Add ARM S3C2442, S3C2443, S3C6400, S3C6410 and ARM/SAMSUNG Add entries for the ARM architectures and platform support that are currently being maintained by myself. Signed-off-by: Ben Dooks --- MAINTAINERS | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fa9377df160d..293cba0dc121 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -824,6 +824,15 @@ M: alex@shark-linux.de W: http://www.shark-linux.de/shark.html S: Maintained +ARM/SAMSUNG ARM ARCHITECTURES +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.fluff.org/ben/linux/ +S: Maintained +F: arch/arm/plat-s3c/ +F: arch/arm/plat-s3c24xx/ + ARM/S3C2410 ARM ARCHITECTURE P: Ben Dooks M: ben-linux@fluff.org @@ -840,6 +849,38 @@ W: http://www.fluff.org/ben/linux/ S: Maintained F: arch/arm/mach-s3c2440/ +ARM/S3C2442 ARM ARCHITECTURE +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.fluff.org/ben/linux/ +S: Maintained +F: arch/arm/mach-s3c2442/ + +ARM/S3C2443 ARM ARCHITECTURE +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.fluff.org/ben/linux/ +S: Maintained +F: arch/arm/mach-s3c2443/ + +ARM/S3C6400 ARM ARCHITECTURE +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.fluff.org/ben/linux/ +S: Maintained +F: arch/arm/mach-s3c6400/ + +ARM/S3C6410 ARM ARCHITECTURE +P: Ben Dooks +M: ben-linux@fluff.org +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) +W: http://www.fluff.org/ben/linux/ +S: Maintained +F: arch/arm/mach-s3c6410/ + ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT P: Lennert Buytenhek M: kernel@wantstofly.org From c79ee4e466dd12347f112e2af306dca35198458f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 16 Jun 2009 12:23:58 +0200 Subject: [PATCH 003/741] dma-debug: fix off-by-one error in overlap function This patch fixes a bug in the overlap function which returned true if one region ends exactly before the second region begins. This is no overlap but the function returned true in that case. Cc: stable@kernel.org Reported-by: Andrew Randrianasulu Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 3b93129a968c..a9b6b5c9e091 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -862,7 +862,7 @@ static inline bool overlap(void *addr, u64 size, void *start, void *end) return ((addr >= start && addr < end) || (addr2 >= start && addr2 < end) || - ((addr < start) && (addr2 >= end))); + ((addr < start) && (addr2 > end))); } static void check_for_illegal_area(struct device *dev, void *addr, u64 size) From b0a5b83ee0fce9dbf8ff5fe1f8c9ae7dfafe458c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Jun 2009 16:11:14 +0200 Subject: [PATCH 004/741] dma-debug: Put all hash-chain locks into the same lock class Alan Cox reported that lockdep runs out of its stack-trace entries with certain configs: BUG: MAX_STACK_TRACE_ENTRIES too low This happens because there are 1024 hash buckets, each with a separate lock. Lockdep puts each lock into a separate lock class and tracks them independently. But in reality we never take more than one of the buckets, so they really belong into a single lock-class. Annotate the has bucket lock init accordingly. [ Impact: reduce the lockdep footprint of dma-debug ] Reported-by: Alan Cox Signed-off-by: Ingo Molnar Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index a9b6b5c9e091..c9187fed0b93 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -716,7 +716,7 @@ void dma_debug_init(u32 num_entries) for (i = 0; i < HASH_SIZE; ++i) { INIT_LIST_HEAD(&dma_entry_hash[i].list); - dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&dma_entry_hash[i].lock); } if (dma_debug_fs_init() != 0) { From d9f2a5ecb2846d0fd368fb4c45182e43f38e4471 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 20 Jun 2009 13:19:25 +0530 Subject: [PATCH 005/741] perf_counter, x8: Fix L1-data-Cache-Store-Referencees for AMD Fix AMD's Data Cache Refills from System event. After this patch : ./tools/perf/perf stat -e l1d -e l1d-misses -e l1d-write -e l1d-prefetch -e l1d-prefetch-miss -e l1i -e l1i-misses -e l1i-prefetch -e l2 -e l2-misses -e l2-write -e dtlb -e dtlb-misses -e itlb -e itlb-misses -e bpu -e bpu-misses ls /dev/ > /dev/null Performance counter stats for 'ls /dev/': 2499484 L1-data-Cache-Load-Referencees (scaled from 3.97%) 70347 L1-data-Cache-Load-Misses (scaled from 7.30%) 9360 L1-data-Cache-Store-Referencees (scaled from 8.64%) 32804 L1-data-Cache-Prefetch-Referencees (scaled from 17.72%) 7693 L1-data-Cache-Prefetch-Misses (scaled from 22.97%) 2180945 L1-instruction-Cache-Load-Referencees (scaled from 28.48%) 14518 L1-instruction-Cache-Load-Misses (scaled from 35.00%) 2405 L1-instruction-Cache-Prefetch-Referencees (scaled from 34.89%) 71387 L2-Cache-Load-Referencees (scaled from 34.94%) 18732 L2-Cache-Load-Misses (scaled from 34.92%) 79918 L2-Cache-Store-Referencees (scaled from 36.02%) 1295294 Data-TLB-Cache-Load-Referencees (scaled from 35.99%) 30896 Data-TLB-Cache-Load-Misses (scaled from 33.36%) 1222030 Instruction-TLB-Cache-Load-Referencees (scaled from 29.46%) 357 Instruction-TLB-Cache-Load-Misses (scaled from 20.46%) 530888 Branch-Cache-Load-Referencees (scaled from 11.48%) 8638 Branch-Cache-Load-Misses (scaled from 5.09%) 0.011295149 seconds time elapsed. Earlier it always shows value 0. Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1245484165.3102.6.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 76dfef23f789..22eb3a1d4f9c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -401,7 +401,7 @@ static const u64 amd_hw_cache_event_ids [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ [ C(RESULT_MISS) ] = 0, }, [ C(OP_PREFETCH) ] = { From c1f47b454ce759d7b13604137a233cad4617e1e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Jun 2009 13:58:51 +0200 Subject: [PATCH 006/741] perf_counter tools: Fix vmlinux fallback when running on a different kernel Lucas De Marchi reported that perf report and perf annotate displays mismatching profile if a perf.data is analyzed on an older kernel - even if the correct vmlinux is specified via the -k option. The reason is the fallback path in util/symbol.c:dso__load_kernel(): int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose) { int err = -1; if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); if (err) err = dso__load_kallsyms(self, filter, verbose); return err; } dso__load_vmlinux() returns negative on error, but on success it returns the number of symbols loaded - which confuses the function to load the kallsyms. This is normally harmless, as reporting is usually performed on the same kernel that is analyzed - but if there's a mismatch then we load the wrong kallsyms and create a non-sensical symbol tree. The fix is to only fall back to kallsyms on errors. Reported-by: Lucas De Marchi Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 86e14375e74e..01b62fa03996 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -629,7 +629,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err) + if (err < 0) err = dso__load_kallsyms(self, filter, verbose); return err; From b76a3f93d01fc93a87cb6eba4e854ffe378b4bac Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 8 Jun 2009 19:28:41 +0300 Subject: [PATCH 007/741] exofs: Fix bio leak in error handling path (sync read) When failing a read request in the sync path, called from write_begin, I forgot to free the allocated bio, fix it. Signed-off-by: Boaz Harrosh --- fs/exofs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 77d0a295eb1c..bb5d6ed0f7a8 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -295,6 +295,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync) err: if (!is_sync) _unlock_pcol_pages(pcol, ret, READ); + else /* Pages unlocked by caller in sync mode only free bio */ + pcol_free(pcol); + kfree(pcol_copy); if (or) osd_end_request(or); From 27d2e1491985e95c486d991302e399f5c584b4eb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 14 Jun 2009 17:23:09 +0300 Subject: [PATCH 008/741] exofs: Remove IBM copyrights Boaz, Congrats on getting all the OSD stuff into 2.6.30! I just pulled the git, and saw that the IBM copyrights are still there. Please remove them from all files: * Copyright (C) 2005, 2006 * International Business Machines IBM has revoked all rights on the code - they gave it to me. Thanks! Avishay Signed-off-by: Avishay Traeger Signed-off-by: Boaz Harrosh --- fs/exofs/common.h | 4 +--- fs/exofs/dir.c | 4 +--- fs/exofs/exofs.h | 4 +--- fs/exofs/file.c | 4 +--- fs/exofs/inode.c | 4 +--- fs/exofs/namei.c | 4 +--- fs/exofs/osd.c | 4 +--- fs/exofs/super.c | 4 +--- fs/exofs/symlink.c | 4 +--- 9 files changed, 9 insertions(+), 27 deletions(-) diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 24667eedc023..c6718e4817fe 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -2,9 +2,7 @@ * common.h - Common definitions for both Kernel and user-mode utilities * * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 65b0c8c776a1..4cfab1cc75c0 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0fd4c7859679..c413b74ecf31 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 6ed7fe484752..c6810038d637 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index bb5d6ed0f7a8..6c10f7476699 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 77fdd765e76d..b7dd0c236863 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index b3d2ccb87aaa..4372542df284 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8216c5b77b53..e47b38e55a26 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 36e2d7bc7f7b..4dd687c3e747 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c @@ -1,8 +1,6 @@ /* * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) - * Copyright (C) 2005, 2006 - * International Business Machines + * Avishay Traeger (avishay@gmail.com) * Copyright (C) 2008, 2009 * Boaz Harrosh * From baaf94cdc7fe1c61e3c660a3b055724fd9d0a034 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 14 Jun 2009 16:52:10 +0300 Subject: [PATCH 009/741] exofs: Avoid using file_fsync() The use of file_fsync() in exofs_file_sync() is not necessary since it does some extra stuff not used by exofs. Open code just the parts that are currently needed. TODO: Farther optimization can be done to sync the sb only on inode update of new files, Usually the sb update is not needed in exofs. Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 3 +++ fs/exofs/file.c | 17 ++++++++++++----- fs/exofs/super.c | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index c413b74ecf31..5ec72e020b22 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -154,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child); int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, struct inode *); +/* super.c */ +int exofs_sync_fs(struct super_block *sb, int wait); + /********************* * operation vectors * *********************/ diff --git a/fs/exofs/file.c b/fs/exofs/file.c index c6810038d637..839b9dc1e70f 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -45,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, { int ret; struct address_space *mapping = filp->f_mapping; + struct inode *inode = dentry->d_inode; + struct super_block *sb; ret = filemap_write_and_wait(mapping); if (ret) return ret; - /*Note: file_fsync below also calles sync_blockdev, which is a no-op - * for exofs, but other then that it does sync_inode and - * sync_superblock which is what we need here. - */ - return file_fsync(filp, dentry, datasync); + /* sync the inode attributes */ + ret = write_inode_now(inode, 1); + + /* This is a good place to write the sb */ + /* TODO: Sechedule an sb-sync on create */ + sb = inode->i_sb; + if (sb->s_dirt) + exofs_sync_fs(sb, 1); + + return ret; } static int exofs_flush(struct file *file, fl_owner_t id) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index e47b38e55a26..a343b4ea62f6 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -198,7 +198,7 @@ static const struct export_operations exofs_export_ops; /* * Write the superblock to the OSD */ -static int exofs_sync_fs(struct super_block *sb, int wait) +int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; From 42c55aa838bbd274a7ad2be1fd81d423ca63da4e Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 17 Jun 2009 16:54:34 +0300 Subject: [PATCH 010/741] MAINTAINERS: Add osd maintained files (F:) OSD files are found in three places: drivers/scsi/osd/ include/scsi/osd_* fs/exofs/ Signed-off-by: Boaz Harrosh --- MAINTAINERS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dc226e78612c..b9b208955098 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4321,7 +4321,7 @@ W: http://www.nongnu.org/orinoco/ S: Maintained F: drivers/net/wireless/orinoco/ -OSD LIBRARY +OSD LIBRARY and FILESYSTEM P: Boaz Harrosh M: bharrosh@panasas.com P: Benny Halevy @@ -4330,6 +4330,9 @@ L: osd-dev@open-osd.org W: http://open-osd.org T: git git://git.open-osd.org/open-osd.git S: Maintained +F: drivers/scsi/osd/ +F: drivers/include/scsi/osd_* +F: fs/exofs/ P54 WIRELESS DRIVER P: Michael Wu From 51e268423151fc7bb41945bde7843160b6a14c32 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:43:14 +0530 Subject: [PATCH 011/741] perf_counter tools: Define separate declarations for H/W and S/W events Define separate declarations for H/W and S/W events to: 1. Shorten name to save some space so that we can add more members 2. Fix alignment 3. Avoid declaring HARDWARE/SOFTWARE again and again. Removed unused CR(x, y) Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669194.17153.6.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da38d6a..12abab3a0d63 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,30 +18,30 @@ struct event_symbol { char *symbol; }; -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, + { CHW(CPU_CYCLES), "cpu-cycles", }, + { CHW(CPU_CYCLES), "cycles", }, + { CHW(INSTRUCTIONS), "instructions", }, + { CHW(CACHE_REFERENCES), "cache-references", }, + { CHW(CACHE_MISSES), "cache-misses", }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, + { CHW(BRANCH_INSTRUCTIONS), "branches", }, + { CHW(BRANCH_MISSES), "branch-misses", }, + { CHW(BUS_CYCLES), "bus-cycles", }, - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, + { CSW(CPU_CLOCK), "cpu-clock", }, + { CSW(TASK_CLOCK), "task-clock", }, + { CSW(PAGE_FAULTS), "page-faults", }, + { CSW(PAGE_FAULTS), "faults", }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", }, + { CSW(CONTEXT_SWITCHES), "context-switches", }, + { CSW(CONTEXT_SWITCHES), "cs", }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", }, + { CSW(CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ From 74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:44:28 +0530 Subject: [PATCH 012/741] perf_counter tools: Introduce alias member in event_symbol By introducing alias member in event_symbol : 1. duplicate lines are removed, like: cpu-cycles and cycles branch-instructions and branches context-switches and cs cpu-migrations and migrations 2. We can also add alias for another events. Now ./perf list looks like : List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults [Software event] faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669268.17153.8.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 61 +++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 12abab3a0d63..f5695486ad3f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -16,32 +16,29 @@ struct event_symbol { u8 type; u64 config; char *symbol; + char *alias; }; #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", }, - { CHW(CPU_CYCLES), "cycles", }, - { CHW(INSTRUCTIONS), "instructions", }, - { CHW(CACHE_REFERENCES), "cache-references", }, - { CHW(CACHE_MISSES), "cache-misses", }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, - { CHW(BRANCH_INSTRUCTIONS), "branches", }, - { CHW(BRANCH_MISSES), "branch-misses", }, - { CHW(BUS_CYCLES), "bus-cycles", }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, - { CSW(CPU_CLOCK), "cpu-clock", }, - { CSW(TASK_CLOCK), "task-clock", }, - { CSW(PAGE_FAULTS), "page-faults", }, - { CSW(PAGE_FAULTS), "faults", }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", }, - { CSW(CONTEXT_SWITCHES), "context-switches", }, - { CSW(CONTEXT_SWITCHES), "cs", }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", }, - { CSW(CPU_MIGRATIONS), "migrations", }, + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "" }, + { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a return 0; } +static int check_events(const char *str, unsigned int i) +{ + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return 1; + + if (strlen(event_symbols[i].alias)) + if (!strncmp(str, event_symbols[i].alias, + strlen(event_symbols[i].alias))) + return 1; + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. @@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - + if (check_events(str, i)) { attr->type = event_symbols[i].type; attr->config = event_symbols[i].config; @@ -289,6 +297,7 @@ void print_events(void) { struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1; + char name[40]; fprintf(stderr, "\n"); fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); @@ -301,14 +310,18 @@ void print_events(void) if (type != prev_type) fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [%s]\n", syms->symbol, + if (strlen(syms->alias)) + sprintf(name, "%s OR %s", syms->symbol, syms->alias); + else + strcpy(name, syms->symbol); + fprintf(stderr, " %-40s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); fprintf(stderr, "\n"); From 520f2c346af463fa00924b236e092da482b344cc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 16:52:51 +0200 Subject: [PATCH 013/741] perf report: Output more symbol related debug data Print more symbol relocation related info under -vv. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- tools/perf/util/symbol.c | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5eb5566f0c95..ec230a0146e9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -797,7 +797,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, { struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; - uint64_t ip = *ipp; + u64 ip = *ipp; if (!thread) return NULL; @@ -814,7 +814,6 @@ resolve_symbol(struct thread *thread, struct map **mapp, *mapp = map; got_map: ip = map->map_ip(map, ip); - *ipp = ip; dso = map->dso; } else { @@ -828,6 +827,8 @@ got_map: dso = kernel_dso; } dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); + *ipp = ip; if (dsop) *dsop = dso; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 01b62fa03996..9c659ef6aec2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -535,6 +535,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; f = symbol__new(sym.st_value, sym.st_size, From c0c22dbfa8ba3c5045eeb9c76d2822ffc44fefc3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 20:47:26 +0530 Subject: [PATCH 014/741] perf_counter tools: Set alias for page-faults "faults" should be alias for "page-faults" Also fixed alignment and 80 characters issue Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245683846.12092.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f5695486ad3f..06af2fadcd87 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -33,8 +33,7 @@ static struct event_symbol event_symbols[] = { { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "" }, - { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS), "page-faults", "faults" }, { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, @@ -71,24 +70,24 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 -static char *hw_cache [][MAX_ALIASES] = { - { "L1-data" , "l1-d", "l1d" }, - { "L1-instruction" , "l1-i", "l1i" }, - { "L2" , "l2" }, - { "Data-TLB" , "dtlb", "d-tlb" }, - { "Instruction-TLB" , "itlb", "i-tlb" }, - { "Branch" , "bpu" , "btb", "bpc" }, +static char *hw_cache[][MAX_ALIASES] = { + { "L1-data", "l1-d", "l1d" }, + { "L1-instruction", "l1-i", "l1i" }, + { "L2", "l2" }, + { "Data-TLB", "dtlb", "d-tlb" }, + { "Instruction-TLB", "itlb", "i-tlb" }, + { "Branch", "bpu" , "btb", "bpc" }, }; -static char *hw_cache_op [][MAX_ALIASES] = { - { "Load" , "read" }, - { "Store" , "write" }, - { "Prefetch" , "speculative-read", "speculative-load" }, +static char *hw_cache_op[][MAX_ALIASES] = { + { "Load", "read" }, + { "Store", "write" }, + { "Prefetch", "speculative-read", "speculative-load" }, }; -static char *hw_cache_result [][MAX_ALIASES] = { - { "Reference" , "ops", "access" }, - { "Miss" }, +static char *hw_cache_result[][MAX_ALIASES] = { + { "Reference", "ops", "access" }, + { "Miss" }, }; char *event_name(int counter) @@ -160,7 +159,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) return -1; } -static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +static int +parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) { int cache_type = -1, cache_op = 0, cache_result = 0; @@ -201,7 +201,7 @@ static int check_events(const char *str, unsigned int i) if (strlen(event_symbols[i].alias)) if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) + strlen(event_symbols[i].alias))) return 1; return 0; } From 4839641333d4593bfc4fb29aa3af10d36f607d5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 23 Jun 2009 01:34:19 +0100 Subject: [PATCH 015/741] jffs2: fix another potential leak on error path in scan.c Signed-off-by: David Woodhouse --- fs/jffs2/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 7515e73e2bfb..696686cc206e 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (jffs2_sum_active()) { s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!s) { - kfree(flashbuf); JFFS2_WARNING("Can't allocate memory for summary\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } From dee412066aeb16c43cf31599948c1a1de385df56 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 02:22:39 +0530 Subject: [PATCH 016/741] perf stat: Fix command option / manpage -l is not supported, it should be -S for scale. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245703959.6167.16.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c368a72721d7..0d74346d21ab 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e | --event=EVENT] [-l] [-a] -'perf stat' [-e | --event=EVENT] [-l] [-a] -- [] +'perf stat' [-e | --event=EVENT] [-S] [-a] +'perf stat' [-e | --event=EVENT] [-S] [-a] -- [] DESCRIPTION ----------- @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --l:: +-S:: scale counter values EXAMPLES From 3d906ef10a539ff336010afab8f6f9c4fe379695 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 11:23:07 +0200 Subject: [PATCH 017/741] perf_counter tools: Handle overlapping MMAP events Martin Schwidefsky reported "perf report" symbol resolution problems on S390. Since we only report MMAP, not MUNMAP, we have to deal with overlapping maps. We used to simply throw out the old map on the assumption whole maps got unmapped. This obviously doesn't deal with partial unmaps. However it appears some dynamic linkers do fancy partial unmaps (s390), so do something more elaborate and truncate the old maps, only removing them when they've been fully covered. This resolves (part of) the S390 symbol resolution problems. Reported-by: Martin Schwidefsky Tested-by: Martin Schwidefsky Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec230a0146e9..b4e76f75ba87 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -400,9 +400,27 @@ static void thread__insert_map(struct thread *self, struct map *map) list_for_each_entry_safe(pos, tmp, &self->maps, node) { if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + list_del_init(&pos->node); + free(pos); + } } } From f29ac756a40d0f1bb07d682ea521e7b666ff06d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 18:27:26 +0200 Subject: [PATCH 018/741] perf_counter: Optimize perf_swcounter_event() Similar to tracepoints, use an enable variable to reduce overhead when unused. Only look for a counter of a particular event type when we know there is at least one in the system. Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 ++++++++++- kernel/perf_counter.c | 18 +++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 89698d8aba5c..e7213e46cf9c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -669,7 +669,16 @@ static inline int is_software_counter(struct perf_counter *counter) (counter->attr.type != PERF_TYPE_HW_CACHE); } -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); +extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swcounter_enabled[event])) + __perf_swcounter_event(event, nr, nmi, regs, addr); +} extern void __perf_counter_mmap(struct vm_area_struct *vma); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1a933a221ea4..7515c7695428 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3317,8 +3317,8 @@ out: put_cpu_var(perf_cpu_context); } -void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +void __perf_swcounter_event(u32 event, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { struct perf_sample_data data = { .regs = regs, @@ -3509,9 +3509,19 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) } #endif +atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; + +static void sw_perf_counter_destroy(struct perf_counter *counter) +{ + u64 event = counter->attr.config; + + atomic_dec(&perf_swcounter_enabled[event]); +} + static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) { const struct pmu *pmu = NULL; + u64 event = counter->attr.config; /* * Software counters (currently) can't in general distinguish @@ -3520,7 +3530,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (counter->attr.config) { + switch (event) { case PERF_COUNT_SW_CPU_CLOCK: pmu = &perf_ops_cpu_clock; @@ -3541,6 +3551,8 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: + atomic_inc(&perf_swcounter_enabled[event]); + counter->destroy = sw_perf_counter_destroy; pmu = &perf_ops_generic; break; } From b84fbc9fb1d943e2c5f4efe52ed0e3c93a4bdb6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 13:57:40 +0200 Subject: [PATCH 019/741] perf_counter: Push inherit into perf_counter_alloc() Teach perf_counter_alloc() about inheritance so that we can optimize the inherit path in the next patch. Remove the child_counter->atrr.inherit = 1 line because the only way to get there is if parent_counter->attr.inherit == 1 and we copy the attrs. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7515c7695428..0a45490f4029 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3568,6 +3568,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, int cpu, struct perf_counter_context *ctx, struct perf_counter *group_leader, + struct perf_counter *parent_counter, gfp_t gfpflags) { const struct pmu *pmu; @@ -3603,6 +3604,8 @@ perf_counter_alloc(struct perf_counter_attr *attr, counter->ctx = ctx; counter->oncpu = -1; + counter->parent = parent_counter; + counter->ns = get_pid_ns(current->nsproxy->pid_ns); counter->id = atomic64_inc_return(&perf_counter_id); @@ -3827,7 +3830,7 @@ SYSCALL_DEFINE5(perf_counter_open, } counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, - GFP_KERNEL); + NULL, GFP_KERNEL); ret = PTR_ERR(counter); if (IS_ERR(counter)) goto err_put_context; @@ -3893,7 +3896,8 @@ inherit_counter(struct perf_counter *parent_counter, child_counter = perf_counter_alloc(&parent_counter->attr, parent_counter->cpu, child_ctx, - group_leader, GFP_KERNEL); + group_leader, parent_counter, + GFP_KERNEL); if (IS_ERR(child_counter)) return child_counter; get_ctx(child_ctx); @@ -3916,12 +3920,6 @@ inherit_counter(struct perf_counter *parent_counter, */ add_counter_to_ctx(child_counter, child_ctx); - child_counter->parent = parent_counter; - /* - * inherit into child's child as well: - */ - child_counter->attr.inherit = 1; - /* * Get a reference to the parent filp - we will fput it * when the child counter exits. This is safe to do because From f344011ccb85469445369153c3d27c4ee4bc2ac8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 13:58:35 +0200 Subject: [PATCH 020/741] perf_counter: Optimize perf_counter_alloc()'s inherit case We don't need to add usage counts for swcounter and attr usage models for inherited counters since the parent counter will always have one, which suffices to generate the needed output. This avoids up to 3 global atomic increments per inherited counter. LKML-Reference: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0a45490f4029..c2b19c111718 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1508,11 +1508,13 @@ static void free_counter(struct perf_counter *counter) { perf_pending_sync(counter); - atomic_dec(&nr_counters); - if (counter->attr.mmap) - atomic_dec(&nr_mmap_counters); - if (counter->attr.comm) - atomic_dec(&nr_comm_counters); + if (!counter->parent) { + atomic_dec(&nr_counters); + if (counter->attr.mmap) + atomic_dec(&nr_mmap_counters); + if (counter->attr.comm) + atomic_dec(&nr_comm_counters); + } if (counter->destroy) counter->destroy(counter); @@ -3515,6 +3517,8 @@ static void sw_perf_counter_destroy(struct perf_counter *counter) { u64 event = counter->attr.config; + WARN_ON(counter->parent); + atomic_dec(&perf_swcounter_enabled[event]); } @@ -3551,8 +3555,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: - atomic_inc(&perf_swcounter_enabled[event]); - counter->destroy = sw_perf_counter_destroy; + if (!counter->parent) { + atomic_inc(&perf_swcounter_enabled[event]); + counter->destroy = sw_perf_counter_destroy; + } pmu = &perf_ops_generic; break; } @@ -3663,11 +3669,13 @@ done: counter->pmu = pmu; - atomic_inc(&nr_counters); - if (counter->attr.mmap) - atomic_inc(&nr_mmap_counters); - if (counter->attr.comm) - atomic_inc(&nr_comm_counters); + if (!counter->parent) { + atomic_inc(&nr_counters); + if (counter->attr.mmap) + atomic_inc(&nr_mmap_counters); + if (counter->attr.comm) + atomic_inc(&nr_comm_counters); + } return counter; } From 0c405b3346ea08098a82a1ee82912b018dfa9f96 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Tue, 23 Jun 2009 13:30:21 +0300 Subject: [PATCH 021/741] OMAP1: Fix compilation of arch/arm/mach-omap1/mailbox.c This fixes the positioning of " in MODULE_AUTHOR, which is currently causing a build failure on latest git with CONFIG_OMAP_MBOX_FWK=m; the original breakage appears to date from the end of last year in a5abbbe52b7e89a7633319c5417bd4331f7ac8ed Signed-Off-By: Jonathan McDowell Acked-by: Hiroshi DOYU Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap1/mailbox.c b/arch/arm/mach-omap1/mailbox.c index 0af4d6c85b47..6810b4aeb02c 100644 --- a/arch/arm/mach-omap1/mailbox.c +++ b/arch/arm/mach-omap1/mailbox.c @@ -203,5 +203,5 @@ module_exit(omap1_mbox_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("omap mailbox: omap1 architecture specific functions"); -MODULE_AUTHOR("Hiroshi DOYU" ); +MODULE_AUTHOR("Hiroshi DOYU "); MODULE_ALIAS("platform:omap1-mailbox"); From e4d24ec39c98cdc9cd97c26fdd426bbab0034fbe Mon Sep 17 00:00:00 2001 From: Andrew de Quincey Date: Tue, 23 Jun 2009 13:30:21 +0300 Subject: [PATCH 022/741] OMAP1: Fix N770 MMC support Some of the N770's MMC configuration options seem to have been dropped. This patch adds them back in again. Note that only the .ocr_mask change was /critical/, but I've added the .max_freq setting back as well, as the original sources had it. Can anyone confirm if this is unnecessary? Secondly, there is support in the original code for a 4wire/higher speed mode. As I don't have the requisite N770 hardware (I think it was a rev2 N770?) to test this, I can't really add it back. Signed-off-by: Andrew de Quincey Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-nokia770.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index e70fc7c66bbb..d9ebba01ffce 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -205,9 +205,11 @@ static int nokia770_mmc_get_cover_state(struct device *dev, int slot) static struct omap_mmc_platform_data nokia770_mmc2_data = { .nr_slots = 1, .dma_mask = 0xffffffff, + .max_freq = 12000000, .slots[0] = { .set_power = nokia770_mmc_set_power, .get_cover_state = nokia770_mmc_get_cover_state, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .name = "mmcblk", }, }; From d376f89701b0aa5b45d25fbfbeb1a0040399ad30 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Tue, 23 Jun 2009 13:30:22 +0300 Subject: [PATCH 023/741] OMAP1: remove duplicated #include Remove duplicated #include in arch/arm/mach-omap1/board-nokia770.c. Signed-off-by: Huang Weiyi Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-nokia770.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index d9ebba01ffce..ed2a48a9ce74 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #define ADS7846_PENDOWN_GPIO 15 From 762ad3a476baa1831f732488e80960f4aa024393 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Tue, 23 Jun 2009 13:30:22 +0300 Subject: [PATCH 024/741] OMAP2/3: mmc-twl4030: use correct controller in twl_mmc23_set_power twl_mmc23_set_power() has MMC2 twl_mmc_controller hardcoded in it, which breaks MMC3. Find the right controller to use instead. Signed-off-by: Grazvydas Ignotas Cc: David Brownell Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mmc-twl4030.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c index 9756a878fd90..1541fd4c8d0f 100644 --- a/arch/arm/mach-omap2/mmc-twl4030.c +++ b/arch/arm/mach-omap2/mmc-twl4030.c @@ -263,8 +263,19 @@ static int twl_mmc1_set_power(struct device *dev, int slot, int power_on, static int twl_mmc23_set_power(struct device *dev, int slot, int power_on, int vdd) { int ret = 0; - struct twl_mmc_controller *c = &hsmmc[1]; + struct twl_mmc_controller *c = NULL; struct omap_mmc_platform_data *mmc = dev->platform_data; + int i; + + for (i = 1; i < ARRAY_SIZE(hsmmc); i++) { + if (mmc == hsmmc[i].mmc) { + c = &hsmmc[i]; + break; + } + } + + if (c == NULL) + return -ENODEV; /* If we don't see a Vcc regulator, assume it's a fixed * voltage always-on regulator. From 091a58af0ba1765d80b1e74382c7572baceb1bdc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 23 Jun 2009 13:30:22 +0300 Subject: [PATCH 025/741] OMAP2/3: omap mailbox: platform_get_irq() error ignored platform_get_irq may return -ENXIO. but struct omap_mbox mbox_dsp_info.irq is unsigned, so the error was not noticed. Signed-off-by: Roel Kluin Signed-off-by: Hiroshi DOYU Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/mailbox.c b/arch/arm/mach-omap2/mailbox.c index fd5b8a5925cc..6f71f3730c97 100644 --- a/arch/arm/mach-omap2/mailbox.c +++ b/arch/arm/mach-omap2/mailbox.c @@ -282,12 +282,12 @@ static int __devinit omap2_mbox_probe(struct platform_device *pdev) return -ENOMEM; /* DSP or IVA2 IRQ */ - mbox_dsp_info.irq = platform_get_irq(pdev, 0); - if (mbox_dsp_info.irq < 0) { + ret = platform_get_irq(pdev, 0); + if (ret < 0) { dev_err(&pdev->dev, "invalid irq resource\n"); - ret = -ENODEV; goto err_dsp; } + mbox_dsp_info.irq = ret; ret = omap_mbox_register(&pdev->dev, &mbox_dsp_info); if (ret) From 8e25ad964aac0bf6b30dd013303750089f819679 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 23 Jun 2009 13:30:23 +0300 Subject: [PATCH 026/741] OMAP2/3: Add omap_type() for determining GP/EMU/HS The omap_type() function is added and returns the DEVICETYPE field of the CONTROL_STATUS register. The result can be used for conditional code based on whether device is GP (general purpose), EMU or HS (high security). Also move the type defines so omap1 code compile does not require ifdefs for sections using these defines. This code is needed for the following fix to set the SRAM size correctly for HS omaps. Also at least PM and watchdog code will need this function. Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/id.c | 22 ++++++++++++++++++++++ arch/arm/plat-omap/include/mach/cpu.h | 22 +++++++++++----------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 458990e20c60..a98201cc265c 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -48,6 +48,28 @@ int omap_chip_is(struct omap_chip_id oci) } EXPORT_SYMBOL(omap_chip_is); +int omap_type(void) +{ + u32 val = 0; + + if (cpu_is_omap24xx()) + val = omap_ctrl_readl(OMAP24XX_CONTROL_STATUS); + else if (cpu_is_omap34xx()) + val = omap_ctrl_readl(OMAP343X_CONTROL_STATUS); + else { + pr_err("Cannot detect omap type!\n"); + goto out; + } + + val &= OMAP2_DEVICETYPE_MASK; + val >>= 8; + +out: + return val; +} +EXPORT_SYMBOL(omap_type); + + /*----------------------------------------------------------------------------*/ #define OMAP_TAP_IDCODE 0x0204 diff --git a/arch/arm/plat-omap/include/mach/cpu.h b/arch/arm/plat-omap/include/mach/cpu.h index fc60c4ebcc28..285eaa3a8275 100644 --- a/arch/arm/plat-omap/include/mach/cpu.h +++ b/arch/arm/plat-omap/include/mach/cpu.h @@ -30,6 +30,17 @@ #ifndef __ASM_ARCH_OMAP_CPU_H #define __ASM_ARCH_OMAP_CPU_H +/* + * Omap device type i.e. EMU/HS/TST/GP/BAD + */ +#define OMAP2_DEVICE_TYPE_TEST 0 +#define OMAP2_DEVICE_TYPE_EMU 1 +#define OMAP2_DEVICE_TYPE_SEC 2 +#define OMAP2_DEVICE_TYPE_GP 3 +#define OMAP2_DEVICE_TYPE_BAD 4 + +int omap_type(void); + struct omap_chip_id { u8 oc; u8 type; @@ -424,17 +435,6 @@ IS_OMAP_TYPE(3430, 0x3430) int omap_chip_is(struct omap_chip_id oci); -int omap_type(void); - -/* - * Macro to detect device type i.e. EMU/HS/TST/GP/BAD - */ -#define OMAP2_DEVICE_TYPE_TEST 0 -#define OMAP2_DEVICE_TYPE_EMU 1 -#define OMAP2_DEVICE_TYPE_SEC 2 -#define OMAP2_DEVICE_TYPE_GP 3 -#define OMAP2_DEVICE_TYPE_BAD 4 - void omap2_check_revision(void); #endif /* defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) */ From 5b0acc59d1bc5c310dfd6976555664f9dcf4dacd Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 23 Jun 2009 13:30:23 +0300 Subject: [PATCH 027/741] OMAP3: SRAM size fix for HS/EMU devices SRAM size fix for HS/EMU devices Signed-off-by: Tero Kristo Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/sram.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 65006df3f1b7..4ea73804d21e 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -133,7 +133,12 @@ void __init omap_detect_sram(void) if (cpu_is_omap34xx()) { omap_sram_base = OMAP3_SRAM_PUB_VA; omap_sram_start = OMAP3_SRAM_PUB_PA; - omap_sram_size = 0x8000; /* 32K */ + if ((omap_type() == OMAP2_DEVICE_TYPE_EMU) || + (omap_type() == OMAP2_DEVICE_TYPE_SEC)) { + omap_sram_size = 0x7000; /* 28K */ + } else { + omap_sram_size = 0x8000; /* 32K */ + } } else { omap_sram_base = OMAP2_SRAM_PUB_VA; omap_sram_start = OMAP2_SRAM_PUB_PA; From aecedb94b366d6fb5e2a17ca18a5dc78e593198e Mon Sep 17 00:00:00 2001 From: Kalle Jokiniemi Date: Tue, 23 Jun 2009 13:30:24 +0300 Subject: [PATCH 028/741] OMAP3: DMA: Enable idlemodes for DMA OCP This patch enables MStandby smart-idle mode, autoidle smartidle mode, and the autoidle bit for DMA4_OCP_SYSCONFIG. Signed-off-by: Kalle Jokiniemi Signed-off-by: Tony Lindgren Signed-off-by: Kevin Hilman --- arch/arm/plat-omap/dma.c | 13 +++++++++++++ arch/arm/plat-omap/include/mach/dma.h | 15 +++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index def14ec265b3..7677a4a1cef2 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -2457,6 +2457,19 @@ static int __init omap_init_dma(void) setup_irq(irq, &omap24xx_dma_irq); } + /* Enable smartidle idlemodes and autoidle */ + if (cpu_is_omap34xx()) { + u32 v = dma_read(OCP_SYSCONFIG); + v &= ~(DMA_SYSCONFIG_MIDLEMODE_MASK | + DMA_SYSCONFIG_SIDLEMODE_MASK | + DMA_SYSCONFIG_AUTOIDLE); + v |= (DMA_SYSCONFIG_MIDLEMODE(DMA_IDLEMODE_SMARTIDLE) | + DMA_SYSCONFIG_SIDLEMODE(DMA_IDLEMODE_SMARTIDLE) | + DMA_SYSCONFIG_AUTOIDLE); + dma_write(v , OCP_SYSCONFIG); + } + + /* FIXME: Update LCD DMA to work on 24xx */ if (cpu_class_is_omap1()) { r = request_irq(INT_DMA_LCD, lcd_dma_irq_handler, 0, diff --git a/arch/arm/plat-omap/include/mach/dma.h b/arch/arm/plat-omap/include/mach/dma.h index 8c1eae88737e..7b939cc01962 100644 --- a/arch/arm/plat-omap/include/mach/dma.h +++ b/arch/arm/plat-omap/include/mach/dma.h @@ -389,6 +389,21 @@ #define DMA_THREAD_FIFO_25 (0x02 << 14) #define DMA_THREAD_FIFO_50 (0x03 << 14) +/* DMA4_OCP_SYSCONFIG bits */ +#define DMA_SYSCONFIG_MIDLEMODE_MASK (3 << 12) +#define DMA_SYSCONFIG_CLOCKACTIVITY_MASK (3 << 8) +#define DMA_SYSCONFIG_EMUFREE (1 << 5) +#define DMA_SYSCONFIG_SIDLEMODE_MASK (3 << 3) +#define DMA_SYSCONFIG_SOFTRESET (1 << 2) +#define DMA_SYSCONFIG_AUTOIDLE (1 << 0) + +#define DMA_SYSCONFIG_MIDLEMODE(n) ((n) << 12) +#define DMA_SYSCONFIG_SIDLEMODE(n) ((n) << 3) + +#define DMA_IDLEMODE_SMARTIDLE 0x2 +#define DMA_IDLEMODE_NO_IDLE 0x1 +#define DMA_IDLEMODE_FORCE_IDLE 0x0 + /* Chaining modes*/ #ifndef CONFIG_ARCH_OMAP1 #define OMAP_DMA_STATIC_CHAIN 0x1 From 6d453e84b587f38e4197bb2c6a37296c4a80cbac Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 23 Jun 2009 13:30:24 +0300 Subject: [PATCH 029/741] OMAP2/3: gpmc-onenand: correct use of async timings Use async timings when sync timings are not requested. Also ensure that OneNAND is in async mode when async timings are used. Signed-off-by: Adrian Hunter Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc-onenand.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 2fd22f9c5f0e..54fec53a48e7 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -31,6 +31,8 @@ static struct platform_device gpmc_onenand_device = { static int omap2_onenand_set_async_mode(int cs, void __iomem *onenand_base) { struct gpmc_timings t; + u32 reg; + int err; const int t_cer = 15; const int t_avdp = 12; @@ -43,6 +45,11 @@ static int omap2_onenand_set_async_mode(int cs, void __iomem *onenand_base) const int t_wpl = 40; const int t_wph = 30; + /* Ensure sync read and sync write are disabled */ + reg = readw(onenand_base + ONENAND_REG_SYS_CFG1); + reg &= ~ONENAND_SYS_CFG1_SYNC_READ & ~ONENAND_SYS_CFG1_SYNC_WRITE; + writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); + memset(&t, 0, sizeof(t)); t.sync_clk = 0; t.cs_on = 0; @@ -74,7 +81,16 @@ static int omap2_onenand_set_async_mode(int cs, void __iomem *onenand_base) GPMC_CONFIG1_DEVICESIZE_16 | GPMC_CONFIG1_MUXADDDATA); - return gpmc_cs_set_timings(cs, &t); + err = gpmc_cs_set_timings(cs, &t); + if (err) + return err; + + /* Ensure sync read and sync write are disabled */ + reg = readw(onenand_base + ONENAND_REG_SYS_CFG1); + reg &= ~ONENAND_SYS_CFG1_SYNC_READ & ~ONENAND_SYS_CFG1_SYNC_WRITE; + writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); + + return 0; } static void set_onenand_cfg(void __iomem *onenand_base, int latency, @@ -124,7 +140,8 @@ static int omap2_onenand_set_sync_mode(struct omap_onenand_platform_data *cfg, } else if (cfg->flags & ONENAND_SYNC_READWRITE) { sync_read = 1; sync_write = 1; - } + } else + return omap2_onenand_set_async_mode(cs, onenand_base); if (!freq) { /* Very first call freq is not known */ From c8e6488f7b56d82453fc7d526118e9f1c2df133a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 23 Jun 2009 13:30:25 +0300 Subject: [PATCH 030/741] OMAP3: RX51: Use OneNAND sync read / write Use OneNAND sync read / write Signed-off-by: Adrian Hunter Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-rx51-peripherals.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index da93b86234ed..9a0bf6744a05 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -362,6 +362,7 @@ static struct omap_onenand_platform_data board_onenand_data = { .gpio_irq = 65, .parts = onenand_partitions, .nr_parts = ARRAY_SIZE(onenand_partitions), + .flags = ONENAND_SYNC_READWRITE, }; static void __init board_onenand_init(void) From f48ef99ca14577f3ea0a48c0e05ed7f5f6d211e9 Mon Sep 17 00:00:00 2001 From: Fernando Guzman Lugo Date: Tue, 23 Jun 2009 13:30:25 +0300 Subject: [PATCH 031/741] OMAP: IOMMU: function flush_iotlb_page is not flushing correct entry The function flush_iotlb_page is not loading the CAM register with the correct entry to be flushed, so it is flushing other entry Signed-off-by: Fernando Guzman Lugo Signed-off-by: Hiroshi DOYU Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c index 4cf449fa2cb5..4a0301399013 100644 --- a/arch/arm/plat-omap/iommu.c +++ b/arch/arm/plat-omap/iommu.c @@ -298,7 +298,7 @@ void flush_iotlb_page(struct iommu *obj, u32 da) if ((start <= da) && (da < start + bytes)) { dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n", __func__, start, da, bytes); - + iotlb_load_cr(obj, &cr); iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); } } From cb5793db5ecf108594d8006ae838e47996a76a19 Mon Sep 17 00:00:00 2001 From: janboe Date: Tue, 23 Jun 2009 13:30:25 +0300 Subject: [PATCH 032/741] OMAP2/3: Initialize gpio debounce register Some bootloader may initialize debounce register and this will make dbclk not consist with the debounce register after linux kernel boot up. Signed-off-by: janboe Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 7fd89ba8d3b5..26b387c12423 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -1585,6 +1585,7 @@ static int __init _omap_gpio_init(void) __raw_writel(0x00000000, bank->base + OMAP24XX_GPIO_IRQENABLE1); __raw_writel(0xffffffff, bank->base + OMAP24XX_GPIO_IRQSTATUS1); __raw_writew(0x0015, bank->base + OMAP24XX_GPIO_SYSCONFIG); + __raw_writel(0x00000000, bank->base + OMAP24XX_GPIO_DEBOUNCE_EN); /* Initialize interface clock ungated, module enabled */ __raw_writel(0, bank->base + OMAP24XX_GPIO_CTRL); From 291e99a112f9bf34c027031de7ef8b94a2692937 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 4 May 2009 09:30:51 +0100 Subject: [PATCH 033/741] [ARM] S3C24XX: Fix use of CONFIG_S3C24XX_PWM CONFIG_S3C24XX_PWM was defined in arch/arm/plat-s3c24xx/Kconfig but not used anywhere else as the corresponding makefile used CONFIG_HAVE_PWM (selected by CONFIG_S3C24XX_PWM) to compile the PWM driver. Change the makefile to use CONFIG_S3C24XX_PWM to compile this driver to ensure it is only build when needed. Signed-off-by: Ben Dooks --- arch/arm/plat-s3c24xx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-s3c24xx/Makefile b/arch/arm/plat-s3c24xx/Makefile index 636cb12711df..579a165c2827 100644 --- a/arch/arm/plat-s3c24xx/Makefile +++ b/arch/arm/plat-s3c24xx/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_PM_SIMTEC) += pm-simtec.o obj-$(CONFIG_PM) += pm.o obj-$(CONFIG_PM) += irq-pm.o obj-$(CONFIG_PM) += sleep.o -obj-$(CONFIG_HAVE_PWM) += pwm.o +obj-$(CONFIG_S3C24XX_PWM) += pwm.o obj-$(CONFIG_S3C2410_CLOCK) += s3c2410-clock.o obj-$(CONFIG_S3C2410_DMA) += dma.o obj-$(CONFIG_S3C24XX_ADC) += adc.o From a18327f35a879a6467c3e901da7f68944b191732 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 4 May 2009 09:53:12 +0100 Subject: [PATCH 034/741] [ARM] S3C: Remove unused CONFIG_DEBUG_S3C_PORT Remove the unused CONFIG_DEBUG_S3C_PORT as we currently only have support for using the S3C UARTs via the low-level debug code. Signed-off-by: Ben Dooks --- arch/arm/Kconfig.debug | 8 -------- arch/arm/configs/s3c2410_defconfig | 1 - arch/arm/configs/s3c6400_defconfig | 1 - arch/arm/configs/tct_hammer_defconfig | 1 - 4 files changed, 11 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index a71fd941ade7..a89e4734b8f0 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -99,14 +99,6 @@ config DEBUG_CLPS711X_UART2 output to the second serial port on these devices. Saying N will cause the debug messages to appear on the first serial port. -config DEBUG_S3C_PORT - depends on DEBUG_LL && PLAT_S3C - bool "Kernel low-level debugging messages via S3C UART" - help - Say Y here if you want debug print routines to go to one of the - S3C internal UARTs. The chosen UART must have been configured - before it is used. - config DEBUG_S3C_UART depends on PLAT_S3C int "S3C UART to use for low-level debug" diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 2d58b8fe59be..f4f1899f3c88 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -2298,7 +2298,6 @@ CONFIG_DEBUG_ERRORS=y # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_DEBUG_LL=y # CONFIG_DEBUG_ICEDCC is not set -CONFIG_DEBUG_S3C_PORT=y CONFIG_DEBUG_S3C_UART=0 # diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index 2e8fa50e9a09..32860609e057 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -816,7 +816,6 @@ CONFIG_DEBUG_ERRORS=y # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_DEBUG_LL=y # CONFIG_DEBUG_ICEDCC is not set -CONFIG_DEBUG_S3C_PORT=y CONFIG_DEBUG_S3C_UART=0 # diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index 07dfb98df4f0..9d32faef05f6 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -857,7 +857,6 @@ CONFIG_DEBUG_ERRORS=y # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_DEBUG_LL=y # CONFIG_DEBUG_ICEDCC is not set -# CONFIG_DEBUG_S3C_PORT is not set CONFIG_DEBUG_S3C_UART=0 # From 503dcbeba50fd3545283594bc391b4a400fa6c48 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 23 Jun 2009 16:55:30 +0300 Subject: [PATCH 035/741] OMAP: Fix IOMEM macro for assembly Otherwise IOMEM calculations can fail. Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/mach/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/mach/io.h b/arch/arm/plat-omap/include/mach/io.h index 3b2814720569..73f483d56ca6 100644 --- a/arch/arm/plat-omap/include/mach/io.h +++ b/arch/arm/plat-omap/include/mach/io.h @@ -201,7 +201,7 @@ #define OMAP2_IO_ADDRESS(pa) IOMEM(__OMAP2_IO_ADDRESS(pa)) #ifdef __ASSEMBLER__ -#define IOMEM(x) x +#define IOMEM(x) (x) #else #define IOMEM(x) ((void __force __iomem *)(x)) From b0a28589b2fc9bee8ed83dee006a497d1ce93841 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Jun 2009 16:39:53 +0200 Subject: [PATCH 036/741] perf report: Fix help text typo Reported-by: Brice Goglin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 52d3fc6846a9..40c1db83a16d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -13,7 +13,7 @@ SYNOPSIS DESCRIPTION ----------- This command displays the performance counter profile information recorded -via perf report. +via perf record. OPTIONS ------- From 927dbcd668ede8d2210cc59bea548ca9ff45b240 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 23 Jun 2009 16:15:38 +0100 Subject: [PATCH 037/741] [ARM] S3C24XX: Fix spi-bus configuration build errors The commit ec976d6eb021dc8f2994248c310a41540f4756bd removed a number of gpio definitions from but misssed updating these two files: Fix the following build errors by including : arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c: In function 's3c24xx_spi_gpiocfg_bus1_gpg5_6_7': arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c:25: error: implicit declaration of function 's3c2410_gpio_cfgpin' arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c:28: error: implicit declaration of function 's3c2410_gpio_pullup' arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c: In function 's3c24xx_spi_gpiocfg_bus0_gpe11_12_13': arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c:25: error: implicit declaration of function 's3c2410_gpio_cfgpin' arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c:28: error: implicit declaration of function 's3c2410_gpio_pullup' Signed-off-by: Ben Dooks Signed-off-by: Ben Dooks --- arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c | 3 +-- arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c index 9edf7894eedd..da7a61728c18 100644 --- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c +++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c @@ -12,8 +12,7 @@ */ #include - -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c index f34d0fc69ad8..86b9edc67413 100644 --- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c +++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c @@ -12,8 +12,7 @@ */ #include - -#include +#include #include #include From ffd14417bdf2a1650bcb16d37c7e658535d1681a Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Tue, 23 Jun 2009 20:58:57 +0800 Subject: [PATCH 038/741] [ARM] MINI2440: remove duplicated #include Remove duplicated #include('s) in arch/arm/mach-s3c2440/mach-mini2440.c Signed-off-by: Huang Weiyi Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2440/mach-mini2440.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-s3c2440/mach-mini2440.c b/arch/arm/mach-s3c2440/mach-mini2440.c index 6a5bc3021bdb..e714bfabbc09 100644 --- a/arch/arm/mach-s3c2440/mach-mini2440.c +++ b/arch/arm/mach-s3c2440/mach-mini2440.c @@ -48,8 +48,6 @@ #include #include -#include - #include #include #include From a3c79901170aba0d3717c2602326bab639eb1344 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 23 Jun 2009 16:30:02 +0100 Subject: [PATCH 039/741] [ARM] S3C24XX: Fix missing s3c_iis_device. Commit 52da219e9664e537a745877b0efa7cf2b1ff2996 removed the s3c_device_iis, but didn't replace it with anything so a number of s3c24xx machines are currently failing to build. As a temporary fix, re-instate s3c_device_iis until a proper replacement can be done for it. Signed-off-by: Ben Dooks --- arch/arm/plat-s3c/include/plat/devs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-s3c/include/plat/devs.h b/arch/arm/plat-s3c/include/plat/devs.h index b5b9c4d46e9a..2e170827e0b0 100644 --- a/arch/arm/plat-s3c/include/plat/devs.h +++ b/arch/arm/plat-s3c/include/plat/devs.h @@ -37,6 +37,7 @@ extern struct platform_device s3c_device_i2c1; extern struct platform_device s3c_device_rtc; extern struct platform_device s3c_device_adc; extern struct platform_device s3c_device_sdi; +extern struct platform_device s3c_device_iis; extern struct platform_device s3c_device_hwmon; extern struct platform_device s3c_device_hsmmc0; extern struct platform_device s3c_device_hsmmc1; From b647712f669f4fadf428a14ab45ab4c03558cdf5 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 23 Jun 2009 16:34:35 +0100 Subject: [PATCH 040/741] [ARM] S3C: Fix S3C24XX build to not include s3c64xx IIS devices Commit 52da219e9664e537a745877b0efa7cf2b1ff2996 added IIS platform devices, but these do not build on s3c24xx systems and the file depends on SND_S3C24XX_SOC, which is selected for all S3C64XX/S3C24XX systems. As a quick fix, make the dev-audio.o file depends on SND_S3C64XX_SOC_I2S instead. Signed-off-by: Ben Dooks --- arch/arm/plat-s3c/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-s3c/Makefile b/arch/arm/plat-s3c/Makefile index 74bb7cb5da49..0761766b1833 100644 --- a/arch/arm/plat-s3c/Makefile +++ b/arch/arm/plat-s3c/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_S3C_DEV_HSMMC) += dev-hsmmc.o obj-$(CONFIG_S3C_DEV_HSMMC1) += dev-hsmmc1.o obj-y += dev-i2c0.o obj-$(CONFIG_S3C_DEV_I2C1) += dev-i2c1.o -obj-$(CONFIG_SND_S3C24XX_SOC) += dev-audio.o +obj-$(CONFIG_SND_S3C64XX_SOC_I2S) += dev-audio.o obj-$(CONFIG_S3C_DEV_FB) += dev-fb.o obj-$(CONFIG_S3C_DEV_USB_HOST) += dev-usb.o obj-$(CONFIG_S3C_DEV_USB_HSOTG) += dev-usb-hsotg.o From 622a8f5f7bf160507861cf05309020049f42976d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 23 Jun 2009 18:20:10 +0100 Subject: [PATCH 041/741] [ARM] s3c2410_defconfig: add MINI2440 machine to build Add the MINI2440 to the list of machines built by the central defconfig. Signed-off-by: Ben Dooks --- arch/arm/configs/s3c2410_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index f4f1899f3c88..b49810461e41 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -260,6 +260,7 @@ CONFIG_MACH_NEXCODER_2440=y CONFIG_SMDK2440_CPU2440=y CONFIG_MACH_AT2440EVB=y CONFIG_CPU_S3C2442=y +CONFIG_MACH_MINI2440=y # # S3C2442 Machines From 51af243c631d9d03376d1454a24cf0a96e6f17cb Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 23 Jun 2009 18:23:27 +0100 Subject: [PATCH 042/741] [ARM] MINI2440: Add missing flash_bbt flat to NAND The commit 9db41f9edcb87ae050fcb171c44be7f212728d54 added the .flash_bbt flag to the nand set, so add this back into the mach-mini2440.c file (taken out on initial commit to allow build). Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2440/mach-mini2440.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-s3c2440/mach-mini2440.c b/arch/arm/mach-s3c2440/mach-mini2440.c index e714bfabbc09..ec71a6965786 100644 --- a/arch/arm/mach-s3c2440/mach-mini2440.c +++ b/arch/arm/mach-s3c2440/mach-mini2440.c @@ -273,6 +273,7 @@ static struct s3c2410_nand_set mini2440_nand_sets[] __initdata = { .nr_chips = 1, .nr_partitions = ARRAY_SIZE(mini2440_default_nand_part), .partitions = mini2440_default_nand_part, + .flash_bbt = 1, /* we use u-boot to create a BBT */ }, }; From cca03c0aeb18a975abec28df518a2b64ae3e6964 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 17:12:49 +0530 Subject: [PATCH 043/741] perf stat: Fix verbose for perf stat Error message should use stderr for verbose (-v), otherwise message will be lost for: $ ./perf stat -v > /dev/null For example on AMD bus-cycles event is not available so now it looks like: $ ./perf stat -v -e bus-cycles ls > /dev/null Error: counter 0, sys_perf_counter_open() syscall returned with -1 (Invalid argument) Performance counter stats for 'ls': bus-cycles 0.006765877 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245757369.3776.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..5e04fcc8d077 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -109,6 +109,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; + +#define ERR_PERF_OPEN \ +"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" + static void create_perf_stat_counter(int counter) { struct perf_counter_attr *attr = attrs + counter; @@ -119,20 +123,20 @@ static void create_perf_stat_counter(int counter) if (system_wide) { int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); - } + if (fd[cpu][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[cpu][counter], strerror(errno)); } } else { attr->inherit = inherit; attr->disabled = 1; fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); - } + if (fd[0][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[0][counter], strerror(errno)); } } @@ -168,7 +172,7 @@ static void read_counter(int counter) count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { if (fd[cpu][counter] < 0) continue; From 76609a6928bff29ca05a94420ae3e088fbb9c2f9 Mon Sep 17 00:00:00 2001 From: Nelson Castillo Date: Tue, 23 Jun 2009 13:54:32 -0500 Subject: [PATCH 044/741] [ARM] GTA02: build fixes (s3c2410_nand_set usage) This patch fixes two errors we get when building GTA02 kernel. ~ use_bbt is incorrect, we need flash_bbt. ~ We do not need .force_soft_ecc because we can unset CONFIG_MTD_NAND_S3C2410_HWECC. Signed-off-by: Nelson Castillo [ben-linux@fluff.org: updated patch description] Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2442/mach-gta02.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-s3c2442/mach-gta02.c b/arch/arm/mach-s3c2442/mach-gta02.c index e23b581aa0e1..0fb385bd9cd9 100644 --- a/arch/arm/mach-s3c2442/mach-gta02.c +++ b/arch/arm/mach-s3c2442/mach-gta02.c @@ -433,8 +433,7 @@ static struct s3c2410_nand_set gta02_nand_sets[] = { */ .name = "neo1973-nand", .nr_chips = 1, - .use_bbt = 1, - .force_soft_ecc = 1, + .flash_bbt = 1, }, }; From f7679dabfaf69840b000d238a020cee7157aca17 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 22 Jun 2009 18:42:33 +0200 Subject: [PATCH 045/741] perf_counter tools: Fix strbuf_fread() error path handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit size_t res cannot be less than 0 - fread returns 0 on error. [ Updated by: René Scharfe ] Reported-by: Ingo Molnar Signed-off-by: Roel Kluin Cc: Andrew Morton Cc: Junio C Hamano LKML-Reference: <4A3FB479.2090902@lsrfire.ath.cx> Signed-off-by: Ingo Molnar --- tools/perf/util/strbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index eaba09306802..464e7ca898cf 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) res = fread(sb->buf + sb->len, 1, size, f); if (res > 0) strbuf_setlen(sb, sb->len + res); - else if (res < 0 && oldalloc == 0) + else if (oldalloc == 0) strbuf_release(sb); return res; } From c14dab5c0782ef632742963a66276a195418a63c Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 24 Jun 2009 10:13:24 +0800 Subject: [PATCH 046/741] perf_counter, x86: Set global control MSR correctly Previous code made an assumption that the power on value of global control MSR has enabled all fixed and general purpose counters properly. However, this is not the case for certain Intel processors, such as Atom - and it might also be firmware dependent. Each enable bit in IA32_PERF_GLOBAL_CTRL is AND'ed with the enable bits for all privilege levels in the respective IA32_PERFEVTSELx or IA32_PERF_FIXED_CTR_CTRL MSRs to start/stop the counting of respective counters. Counting is enabled if the AND'ed results is true; counting is disabled when the result is false. The end result is that all fixed counters are always disabled on Atom processors because the assumption is just invalid. Fix this by not initializing the ctrl-mask out of the global MSR, but setting it to perf_counter_mask. Reported-by: Stephane Eranian Signed-off-by: Yong Wang Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <20090624021324.GA2788@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 22eb3a1d4f9c..a310d19faca3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -969,13 +969,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) if (!x86_pmu.num_counters_fixed) return -1; - /* - * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86_model == 28) - return -1; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) @@ -1428,8 +1421,6 @@ static int intel_pmu_init(void) */ x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - /* * Install the hw-cache-events table: */ @@ -1514,6 +1505,7 @@ void __init init_hw_perf_counters(void) perf_counter_mask |= ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = perf_counter_mask; perf_counters_lapic_init(); register_die_notifier(&perf_counter_nmi_notifier); From 2a13877c5ef3207a2a5c56250742e60808677f90 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 10 Apr 2009 07:50:45 -0400 Subject: [PATCH 047/741] osdblk: a Linux block device for OSD objects Submitted driver exports a block device of the form /dev/osdblkX, where X is a decimal number. It does that by mounting a stacking block device on top of an osd object. For example, if you create a 2G object on an OSD device, you can then use this module to present that 2G object as a Linux block device. See inside patch for exact documentation. [Sitting at linux-next helped fix proper Kconfig dependency for this driver, thanks to Randy Dunlap] Signed-off-by: Jeff Garzik Signed-off-by: Boaz Harrosh --- drivers/block/Kconfig | 16 + drivers/block/Makefile | 1 + drivers/block/osdblk.c | 694 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 711 insertions(+) create mode 100644 drivers/block/osdblk.c diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index bb72ada9f074..1d886e079c58 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -298,6 +298,22 @@ config BLK_DEV_NBD If unsure, say N. +config BLK_DEV_OSD + tristate "OSD object-as-blkdev support" + depends on SCSI_OSD_ULD + ---help--- + Saying Y or M here will allow the exporting of a single SCSI + OSD (object-based storage) object as a Linux block device. + + For example, if you create a 2G object on an OSD device, + you can then use this module to present that 2G object as + a Linux block device. + + To compile this driver as a module, choose M here: the + module will be called osdblk. + + If unsure, say N. + config BLK_DEV_SX8 tristate "Promise SATA SX8 support" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e2a85e..cdaa3f8fddf0 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o +obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c new file mode 100644 index 000000000000..3565d0dd123f --- /dev/null +++ b/drivers/block/osdblk.c @@ -0,0 +1,694 @@ + +/* + osdblk.c -- Export a single SCSI OSD object as a Linux block device + + + Copyright 2009 Red Hat, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + Instructions for use + -------------------- + + 1) Map a Linux block device to an existing OSD object. + + In this example, we will use partition id 1234, object id 5678, + OSD device /dev/osd1. + + $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add + + + 2) List all active blkdev<->object mappings. + + In this example, we have performed step #1 twice, creating two blkdevs, + mapped to two separate OSD objects. + + $ cat /sys/class/osdblk/list + 0 174 1234 5678 /dev/osd1 + 1 179 1994 897123 /dev/osd0 + + The columns, in order, are: + - blkdev unique id + - blkdev assigned major + - OSD object partition id + - OSD object id + - OSD device + + + 3) Remove an active blkdev<->object mapping. + + In this example, we remove the mapping with blkdev unique id 1. + + $ echo 1 > /sys/class/osdblk/remove + + + NOTE: The actual creation and deletion of OSD objects is outside the scope + of this driver. + + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "osdblk" +#define PFX DRV_NAME ": " + +/* #define _OSDBLK_DEBUG */ +#ifdef _OSDBLK_DEBUG +#define OSDBLK_DEBUG(fmt, a...) \ + printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) +#else +#define OSDBLK_DEBUG(fmt, a...) \ + do { if (0) printk(fmt, ##a); } while (0) +#endif + +MODULE_AUTHOR("Jeff Garzik "); +MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); +MODULE_LICENSE("GPL"); + +struct osdblk_device; + +enum { + OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ + OSDBLK_MAX_REQ = 32, /* max parallel requests */ + OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ +}; + +struct osdblk_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct osdblk_device *osdev; /* associated blkdev */ +}; + +struct osdblk_device { + int id; /* blkdev unique id */ + + int major; /* blkdev assigned major */ + struct gendisk *disk; /* blkdev's gendisk and rq */ + struct request_queue *q; + + struct osd_dev *osd; /* associated OSD */ + + char name[32]; /* blkdev name, e.g. osdblk34 */ + + spinlock_t lock; /* queue lock */ + + struct osd_obj_id obj; /* OSD partition, obj id */ + uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ + + struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ + + struct list_head node; + + char osd_path[0]; /* OSD device path */ +}; + +static struct class *class_osdblk; /* /sys/class/osdblk */ +static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ +static LIST_HEAD(osdblkdev_list); + +static struct block_device_operations osdblk_bd_ops = { + .owner = THIS_MODULE, +}; + +static const struct osd_attr g_attr_logical_length = ATTR_DEF( + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); + +static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], + const struct osd_obj_id *obj) +{ + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); +} + +/* copied from exofs; move to libosd? */ +/* + * Perform a synchronous OSD operation. copied from exofs; move to libosd? + */ +static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) +{ + int ret; + + or->timeout = timeout; + ret = osd_finalize_request(or, 0, credential, NULL); + if (ret) + return ret; + + ret = osd_execute_request(or); + + /* osd_req_decode_sense(or, ret); */ + return ret; +} + +/* + * Perform an asynchronous OSD operation. copied from exofs; move to libosd? + */ +static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, + void *caller_context, u8 *cred) +{ + int ret; + + ret = osd_finalize_request(or, 0, cred, NULL); + if (ret) + return ret; + + ret = osd_execute_request_async(or, async_done, caller_context); + + return ret; +} + +/* copied from exofs; move to libosd? */ +static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) +{ + struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ + void *iter = NULL; + int nelem; + + do { + nelem = 1; + osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); + if ((cur_attr.attr_page == attr->attr_page) && + (cur_attr.attr_id == attr->attr_id)) { + attr->len = cur_attr.len; + attr->val_ptr = cur_attr.val_ptr; + return 0; + } + } while (iter); + + return -EIO; +} + +static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) +{ + struct osd_request *or; + struct osd_attr attr; + int ret; + + /* start request */ + or = osd_start_request(osdev->osd, GFP_KERNEL); + if (!or) + return -ENOMEM; + + /* create a get-attributes(length) request */ + osd_req_get_attributes(or, &osdev->obj); + + osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); + + /* execute op synchronously */ + ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); + if (ret) + goto out; + + /* extract length from returned attribute info */ + attr = g_attr_logical_length; + ret = extract_attr_from_req(or, &attr); + if (ret) + goto out; + + *size_out = get_unaligned_be64(attr.val_ptr); + +out: + osd_end_request(or); + return ret; + +} + +static void osdblk_osd_complete(struct osd_request *or, void *private) +{ + struct osdblk_request *orq = private; + struct osd_sense_info osi; + int ret = osd_req_decode_sense(or, &osi); + + if (ret) { + ret = -EIO; + OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); + } + + /* complete OSD request */ + osd_end_request(or); + + /* complete request passed to osdblk by block layer */ + __blk_end_request_all(orq->rq, ret); +} + +static void bio_chain_put(struct bio *chain) +{ + struct bio *tmp; + + while (chain) { + tmp = chain; + chain = chain->bi_next; + + bio_put(tmp); + } +} + +static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) +{ + struct bio *tmp, *new_chain = NULL, *tail = NULL; + + while (old_chain) { + tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); + if (!tmp) + goto err_out; + + __bio_clone(tmp, old_chain); + tmp->bi_bdev = NULL; + gfpmask &= ~__GFP_WAIT; + tmp->bi_next = NULL; + + if (!new_chain) + new_chain = tail = tmp; + else { + tail->bi_next = tmp; + tail = tmp; + } + + old_chain = old_chain->bi_next; + } + + return new_chain; + +err_out: + OSDBLK_DEBUG("bio_chain_clone with err\n"); + bio_chain_put(new_chain); + return NULL; +} + +static void osdblk_rq_fn(struct request_queue *q) +{ + struct osdblk_device *osdev = q->queuedata; + + while (1) { + struct request *rq; + struct osdblk_request *orq; + struct osd_request *or; + struct bio *bio; + bool do_write, do_flush; + + /* peek at request from block layer */ + rq = blk_fetch_request(q); + if (!rq) + break; + + /* filter out block requests we don't understand */ + if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) { + blk_end_request_all(rq, 0); + continue; + } + + /* deduce our operation (read, write, flush) */ + /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] + * into a clearly defined set of RPC commands: + * read, write, flush, scsi command, power mgmt req, + * driver-specific, etc. + */ + + do_flush = (rq->special == (void *) 0xdeadbeefUL); + do_write = (rq_data_dir(rq) == WRITE); + + if (!do_flush) { /* osd_flush does not use a bio */ + /* a bio clone to be passed down to OSD request */ + bio = bio_chain_clone(rq->bio, GFP_ATOMIC); + if (!bio) + break; + } else + bio = NULL; + + /* alloc internal OSD request, for OSD command execution */ + or = osd_start_request(osdev->osd, GFP_ATOMIC); + if (!or) { + bio_chain_put(bio); + OSDBLK_DEBUG("osd_start_request with err\n"); + break; + } + + orq = &osdev->req[rq->tag]; + orq->rq = rq; + orq->bio = bio; + orq->osdev = osdev; + + /* init OSD command: flush, write or read */ + if (do_flush) + osd_req_flush_object(or, &osdev->obj, + OSD_CDB_FLUSH_ALL, 0, 0); + else if (do_write) + osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, + bio, blk_rq_bytes(rq)); + else + osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, + bio, blk_rq_bytes(rq)); + + OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", + do_flush ? "flush" : do_write ? + "write" : "read", blk_rq_bytes(rq), + blk_rq_pos(rq) * 512ULL); + + /* begin OSD command execution */ + if (osd_async_op(or, osdblk_osd_complete, orq, + osdev->obj_cred)) { + osd_end_request(or); + blk_requeue_request(q, rq); + bio_chain_put(bio); + OSDBLK_DEBUG("osd_execute_request_async with err\n"); + break; + } + + /* remove the special 'flush' marker, now that the command + * is executing + */ + rq->special = NULL; + } +} + +static void osdblk_prepare_flush(struct request_queue *q, struct request *rq) +{ + /* add driver-specific marker, to indicate that this request + * is a flush command + */ + rq->special = (void *) 0xdeadbeefUL; +} + +static void osdblk_free_disk(struct osdblk_device *osdev) +{ + struct gendisk *disk = osdev->disk; + + if (!disk) + return; + + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); +} + +static int osdblk_init_disk(struct osdblk_device *osdev) +{ + struct gendisk *disk; + struct request_queue *q; + int rc; + u64 obj_size = 0; + + /* contact OSD, request size info about the object being mapped */ + rc = osdblk_get_obj_size(osdev, &obj_size); + if (rc) + return rc; + + /* create gendisk info */ + disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); + if (!disk) + return -ENOMEM; + + sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); + disk->major = osdev->major; + disk->first_minor = 0; + disk->fops = &osdblk_bd_ops; + disk->private_data = osdev; + + /* init rq */ + q = blk_init_queue(osdblk_rq_fn, &osdev->lock); + if (!q) { + put_disk(disk); + return -ENOMEM; + } + + /* switch queue to TCQ mode; allocate tag map */ + rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); + if (rc) { + blk_cleanup_queue(q); + put_disk(disk); + return rc; + } + + blk_queue_prep_rq(q, blk_queue_start_tag); + blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); + + disk->queue = q; + + q->queuedata = osdev; + + osdev->disk = disk; + osdev->q = q; + + /* finally, announce the disk to the world */ + set_capacity(disk, obj_size / 512ULL); + add_disk(disk); + + printk(KERN_INFO "%s: Added of size 0x%llx\n", + disk->disk_name, (unsigned long long)obj_size); + + return 0; +} + +/******************************************************************** + * /sys/class/osdblk/ + * add map OSD object to blkdev + * remove unmap OSD object + * list show mappings + *******************************************************************/ + +static void class_osdblk_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t class_osdblk_list(struct class *c, char *data) +{ + int n = 0; + struct list_head *tmp; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + struct osdblk_device *osdev; + + osdev = list_entry(tmp, struct osdblk_device, node); + + n += sprintf(data+n, "%d %d %llu %llu %s\n", + osdev->id, + osdev->major, + osdev->obj.partition, + osdev->obj.id, + osdev->osd_path); + } + + mutex_unlock(&ctl_mutex); + return n; +} + +static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count) +{ + struct osdblk_device *osdev; + ssize_t rc; + int irc, new_id = 0; + struct list_head *tmp; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + /* new osdblk_device object */ + osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); + if (!osdev) { + rc = -ENOMEM; + goto err_out_mod; + } + + /* static osdblk_device initialization */ + spin_lock_init(&osdev->lock); + INIT_LIST_HEAD(&osdev->node); + + /* generate unique id: find highest unique id, add one */ + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + struct osdblk_device *osdev; + + osdev = list_entry(tmp, struct osdblk_device, node); + if (osdev->id > new_id) + new_id = osdev->id + 1; + } + + osdev->id = new_id; + + /* add to global list */ + list_add_tail(&osdev->node, &osdblkdev_list); + + mutex_unlock(&ctl_mutex); + + /* parse add command */ + if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, + osdev->osd_path) != 3) { + rc = -EINVAL; + goto err_out_slot; + } + + /* initialize rest of new object */ + sprintf(osdev->name, DRV_NAME "%d", osdev->id); + + /* contact requested OSD */ + osdev->osd = osduld_path_lookup(osdev->osd_path); + if (IS_ERR(osdev->osd)) { + rc = PTR_ERR(osdev->osd); + goto err_out_slot; + } + + /* build OSD credential */ + osdblk_make_credential(osdev->obj_cred, &osdev->obj); + + /* register our block device */ + irc = register_blkdev(0, osdev->name); + if (irc < 0) { + rc = irc; + goto err_out_osd; + } + + osdev->major = irc; + + /* set up and announce blkdev mapping */ + rc = osdblk_init_disk(osdev); + if (rc) + goto err_out_blkdev; + + return count; + +err_out_blkdev: + unregister_blkdev(osdev->major, osdev->name); +err_out_osd: + osduld_put_device(osdev->osd); +err_out_slot: + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + list_del_init(&osdev->node); + mutex_unlock(&ctl_mutex); + + kfree(osdev); +err_out_mod: + OSDBLK_DEBUG("Error adding device %s\n", buf); + module_put(THIS_MODULE); + return rc; +} + +static ssize_t class_osdblk_remove(struct class *c, const char *buf, + size_t count) +{ + struct osdblk_device *osdev = NULL; + int target_id, rc; + unsigned long ul; + struct list_head *tmp; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + /* remove object from list immediately */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + osdev = list_entry(tmp, struct osdblk_device, node); + if (osdev->id == target_id) { + list_del_init(&osdev->node); + break; + } + osdev = NULL; + } + + mutex_unlock(&ctl_mutex); + + if (!osdev) + return -ENOENT; + + /* clean up and free blkdev and associated OSD connection */ + osdblk_free_disk(osdev); + unregister_blkdev(osdev->major, osdev->name); + osduld_put_device(osdev->osd); + kfree(osdev); + + /* release module ref */ + module_put(THIS_MODULE); + + return count; +} + +static struct class_attribute class_osdblk_attrs[] = { + __ATTR(add, 0200, NULL, class_osdblk_add), + __ATTR(remove, 0200, NULL, class_osdblk_remove), + __ATTR(list, 0444, class_osdblk_list, NULL), + __ATTR_NULL +}; + +static int osdblk_sysfs_init(void) +{ + int ret = 0; + + /* + * create control files in sysfs + * /sys/class/osdblk/... + */ + class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); + if (!class_osdblk) + return -ENOMEM; + + class_osdblk->name = DRV_NAME; + class_osdblk->owner = THIS_MODULE; + class_osdblk->class_release = class_osdblk_release; + class_osdblk->class_attrs = class_osdblk_attrs; + + ret = class_register(class_osdblk); + if (ret) { + kfree(class_osdblk); + class_osdblk = NULL; + printk(PFX "failed to create class osdblk\n"); + return ret; + } + + return 0; +} + +static void osdblk_sysfs_cleanup(void) +{ + if (class_osdblk) + class_destroy(class_osdblk); + class_osdblk = NULL; +} + +static int __init osdblk_init(void) +{ + int rc; + + rc = osdblk_sysfs_init(); + if (rc) + return rc; + + return 0; +} + +static void __exit osdblk_exit(void) +{ + osdblk_sysfs_cleanup(); +} + +module_init(osdblk_init); +module_exit(osdblk_exit); + From bc47df0fa705887242c26c7b040e7cf0170ab1f1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 20 May 2009 18:50:34 +0300 Subject: [PATCH 048/741] osdblk: Adjust queue limits to lower device's limits call blk_queue_stack_limits() to copy queue limits from the underline osd scsi_device. This is absolutely needed because osdblk cannot sleep when allocating a lower-request and therefore cannot be bouncing. TODO: Dynamic changes of limits to the lower device queue will not reflect in the upper driver Signed-off-by: Boaz Harrosh --- drivers/block/osdblk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 3565d0dd123f..13c1aee6aa3f 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -66,6 +66,7 @@ #include #include #include +#include #define DRV_NAME "osdblk" #define PFX DRV_NAME ": " @@ -437,6 +438,12 @@ static int osdblk_init_disk(struct osdblk_device *osdev) return rc; } + /* Set our limits to the lower device limits, because osdblk cannot + * sleep when allocating a lower-request and therefore cannot be + * bouncing. + */ + blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); + blk_queue_prep_rq(q, blk_queue_start_tag); blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); From 6f4b67b8ff707147e14ee71045ab25aa286520f2 Mon Sep 17 00:00:00 2001 From: Shin-ichiro KAWASAKI Date: Sun, 21 Jun 2009 10:56:22 +0000 Subject: [PATCH 049/741] clocksource: sh_tmu: Make undefined TCOR behaviour less undefined. Avoid undocumented vague TMU behavior when zero value is set to TCOR. This primarily fixes up issues encountered under qemu with a zero-length period, while the hardware itself is fairly ambivalent one way or the other. Signed-off-by: Shin-ichiro KAWASAKI Acked-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/clocksource/sh_tmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 9ffb05f4095d..93c2322feab7 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -161,7 +161,7 @@ static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta, if (periodic) sh_tmu_write(p, TCOR, delta); else - sh_tmu_write(p, TCOR, 0); + sh_tmu_write(p, TCOR, 0xffffffff); sh_tmu_write(p, TCNT, delta); From 3d63259583278262d9153316094e315f73ebfcb5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 24 Jun 2009 18:19:34 +0530 Subject: [PATCH 050/741] perf stat: Remove dead code Remove dead code and do some code alignment. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245847774.2681.2.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 48 ++++++++++++--------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5e04fcc8d077..8420ec589506 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,42 +59,27 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { }; +#define MAX_RUN 100 + static int system_wide = 0; -static int inherit = 1; static int verbose = 0; +static int nr_cpus = 0; +static int run_idx = 0; + +static int run_count = 1; +static int inherit = 1; +static int scale = 1; +static int target_pid = -1; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int target_pid = -1; -static int nr_cpus = 0; -static unsigned int page_size; - -static int scale = 1; - -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -#define MAX_RUN 100 - -static int run_count = 1; -static int run_idx = 0; - -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; - -//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; - - static u64 runtime_nsecs[MAX_RUN]; static u64 walltime_nsecs[MAX_RUN]; static u64 runtime_cycles[MAX_RUN]; +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + static u64 event_res_avg[MAX_COUNTERS][3]; static u64 event_res_noise[MAX_COUNTERS][3]; @@ -109,7 +94,6 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; - #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -470,9 +454,9 @@ static const struct option options[] = { OPT_INTEGER('p', "pid", &target_pid, "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), + "system-wide collection from all CPUs"), OPT_BOOLEAN('S', "scale", &scale, - "scale/normalize counters"), + "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, @@ -484,8 +468,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - page_size = sysconf(_SC_PAGE_SIZE); - memcpy(attrs, default_attrs, sizeof(attrs)); argc = parse_options(argc, argv, options, stat_usage, 0); @@ -515,7 +497,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) status = 0; for (run_idx = 0; run_idx < run_count; run_idx++) { if (run_count != 1 && verbose) - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); status = run_perf_stat(argc, argv); } From f6faac71d502be1c29c81b2f45657662c3b84470 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Jun 2009 17:24:40 -0700 Subject: [PATCH 051/741] rcu: Mark Hierarchical RCU no longer experimental Removes the warnings about Hierarchical RCU being experimental, given that it has gone through almost six months of being the default RCU in mainline for the x86 with very little trouble. This makes hierarchical-RCU bootup look less scary. Signed-off-by: Paul E. McKenney Cc: akpm@linux-foundation.org Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: dipankar@in.ibm.com Cc: dhowells@redhat.com Cc: lethal@linux-sh.org Cc: kernel@wantstofly.org Cc: cl@linux-foundation.org Cc: schamp@sgi.com Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0dccfbba6d26..7717b95c2027 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1533,7 +1533,7 @@ void __init __rcu_init(void) int j; struct rcu_node *rnp; - printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); + printk(KERN_INFO "Hierarchical RCU implementation.\n"); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ @@ -1546,7 +1546,6 @@ void __init __rcu_init(void) rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); /* Register notifier for non-boot CPUs */ register_cpu_notifier(&rcu_nb); - printk(KERN_WARNING "Experimental hierarchical RCU init done.\n"); } module_param(blimit, int, 0); From d94d4adb7dd05b4e25f3c317a1b932ec74272a12 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 24 Jun 2009 22:35:30 +0900 Subject: [PATCH 052/741] sh: make set_perf_counter_pending() static inline. Fixes up a recently introduced build error. Reported-by: Kyle McMartin Signed-off-by: Paul Mundt --- arch/sh/include/asm/perf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h index a8153c2aa6fa..61c2b40c802c 100644 --- a/arch/sh/include/asm/perf_counter.h +++ b/arch/sh/include/asm/perf_counter.h @@ -2,6 +2,6 @@ #define __ASM_SH_PERF_COUNTER_H /* SH only supports software counters through this interface. */ -#define set_perf_counter_pending() do { } while (0) +static inline void set_perf_counter_pending(void) {} #endif /* __ASM_SH_PERF_COUNTER_H */ From 17659c60629618c0aa67eb3cb6a77d2c52486d2e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 24 Jun 2009 15:35:15 +0100 Subject: [PATCH 053/741] mtd: maps: Remove BUS_ID_SIZE from integrator_flash Signed-off-by: David Woodhouse Tested-by: Catalin Marinas --- drivers/mtd/maps/integrator-flash.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c index b08a798ee254..2aac41bde8b3 100644 --- a/drivers/mtd/maps/integrator-flash.c +++ b/drivers/mtd/maps/integrator-flash.c @@ -42,10 +42,8 @@ #include #include -#define SUBDEV_NAME_SIZE (BUS_ID_SIZE + 2) - struct armflash_subdev_info { - char name[SUBDEV_NAME_SIZE]; + char *name; struct mtd_info *mtd; struct map_info map; struct flash_platform_data *plat; @@ -134,6 +132,8 @@ static void armflash_subdev_remove(struct armflash_subdev_info *subdev) map_destroy(subdev->mtd); if (subdev->map.virt) iounmap(subdev->map.virt); + kfree(subdev->name); + subdev->name = NULL; release_mem_region(subdev->map.phys, subdev->map.size); } @@ -177,16 +177,22 @@ static int armflash_probe(struct platform_device *dev) if (nr == 1) /* No MTD concatenation, just use the default name */ - snprintf(subdev->name, SUBDEV_NAME_SIZE, "%s", - dev_name(&dev->dev)); + subdev->name = kstrdup(dev_name(&dev->dev), GFP_KERNEL); else - snprintf(subdev->name, SUBDEV_NAME_SIZE, "%s-%d", - dev_name(&dev->dev), i); + subdev->name = kasprintf(GFP_KERNEL, "%s-%d", + dev_name(&dev->dev), i); + if (!subdev->name) { + err = -ENOMEM; + break; + } subdev->plat = plat; err = armflash_subdev_probe(subdev, res); - if (err) + if (err) { + kfree(subdev->name); + subdev->name = NULL; break; + } } info->nr_subdev = i; From 7433ab770327b471399f7b5baacad78e160b5393 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 25 Jun 2009 02:30:10 +0900 Subject: [PATCH 054/741] sh: Hook up page fault events for software perf counters. This adds page fault instrumentation for the software performance counters. Follows the x86 and powerpc changes. Signed-off-by: Paul Mundt --- arch/sh/mm/fault_32.c | 65 ++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index cc8ddbdf3d7a..71925946f1e1 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -15,12 +15,28 @@ #include #include #include -#include +#include #include #include #include #include +static inline int notify_page_fault(struct pt_regs *regs, int trap) +{ + int ret = 0; + +#ifdef CONFIG_KPROBES + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, trap)) + ret = 1; + preempt_enable(); + } +#endif + + return ret; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -87,14 +103,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, return; } - /* Only enable interrupts if they were on before the fault */ - if ((regs->sr & SR_IMASK) != SR_IMASK) { - trace_hardirqs_on(); - local_irq_enable(); - } - mm = tsk->mm; + if (unlikely(notify_page_fault(regs, lookup_exception_vector()))) + return; + + /* Only enable interrupts if they were on before the fault */ + if ((regs->sr & SR_IMASK) != SR_IMASK) + local_irq_enable(); + + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -141,10 +160,15 @@ survive: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { tsk->min_flt++; + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } up_read(&mm->mmap_sem); return; @@ -245,22 +269,6 @@ do_sigbus: goto no_context; } -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - -#ifdef CONFIG_KPROBES - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } -#endif - - return ret; -} - /* * Called with interrupts disabled. */ @@ -273,12 +281,7 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs, pmd_t *pmd; pte_t *pte; pte_t entry; - int ret = 0; - - if (notify_page_fault(regs, lookup_exception_vector())) - goto out; - - ret = 1; + int ret = 1; /* * We don't take page faults for P1, P2, and parts of P4, these From 163b2f0ba93e9298b3d5fff2337d860c3872ec60 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 25 Jun 2009 02:49:03 +0900 Subject: [PATCH 055/741] sh64: Hook up page fault events for software perf counters. sh64 can use these as well, so tie them up there as well. Signed-off-by: Paul Mundt --- arch/sh/mm/tlbflush_64.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index fcbb6e135cef..3ce40ea34824 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -3,7 +3,7 @@ * * Copyright (C) 2000, 2001 Paolo Alberelli * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes) - * Copyright (C) 2003 Paul Mundt + * Copyright (C) 2003 - 2009 Paul Mundt * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, /* Not an IO address, so reenable interrupts */ local_irq_enable(); + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -195,10 +198,16 @@ survive: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) + + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { tsk->min_flt++; + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } /* If we get here, the page fault has been handled. Do the TLB refill now from the newly-setup PTE, to avoid having to fault again right From 1b173f77dd0d5fd4f0ff18034aaa79e30da068b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 24 Jun 2009 19:54:29 +0200 Subject: [PATCH 056/741] perf_counter tools: Add CREDITS file for Git contributors Much of perf's libraries comes from the Git project. I noticed that the files (in tools/perf/util/*.[ch] and elsewhere) are quite spartan wrt. credits, so lets add a CREDITS file that includes an (incomplete!) list of main contributors. Thanks guys, these libraries are really useful. Special thanks go to Johannes Schindelin and Junio C Hamano for coming up with this list. List-Composed-By: Johannes Schindelin Cc: Junio C Hamano Signed-off-by: Ingo Molnar --- tools/perf/CREDITS | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/perf/CREDITS diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS new file mode 100644 index 000000000000..c2ddcb3acbd0 --- /dev/null +++ b/tools/perf/CREDITS @@ -0,0 +1,30 @@ +Most of the infrastructure that 'perf' uses here has been reused +from the Git project, as of version: + + 66996ec: Sync with 1.6.2.4 + +Here is an (incomplete!) list of main contributors to those files +in util/* and elsewhere: + + Alex Riesen + Christian Couder + Dmitry Potapov + Jeff King + Johannes Schindelin + Johannes Sixt + Junio C Hamano + Linus Torvalds + Matthias Kestenholz + Michal Ostrowski + Miklos Vajna + Petr Baudis + Pierre Habouzit + René Scharfe + Samuel Tardieu + Shawn O. Pearce + Steffen Prohaska + Steve Haslam + +Thanks guys! + +The full history of the files can be found in the upstream Git commits. From febe8345353e8873e43f2c2c9792d062c770b22b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 25 Jun 2009 14:41:57 +0900 Subject: [PATCH 057/741] perf_counter tools: add cpu_relax()/rmb() definitions for sh. Simple cpu_relax()/rmb() stubs that perf needs, which were inadvertently omitted from the sh HAVE_PERF_COUNTERS patch. Signed-off-by: Paul Mundt --- tools/perf/perf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ceb68aa51f7f..f735b6924a2f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,6 +19,16 @@ #define cpu_relax() asm volatile("" ::: "memory"); #endif +#ifdef __sh__ +#include "../../arch/sh/include/asm/unistd.h" +#if defined(__SH4A__) || defined(__SH5__) +# define rmb() asm volatile("synco" ::: "memory") +#else +# define rmb() asm volatile("" ::: "memory") +#endif +#define cpu_relax() asm volatile("" ::: "memory") +#endif + #include #include #include From 342ba1039ad7cf464c7927ddf1ddc10d48a3716b Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 24 Jun 2009 18:39:09 -0300 Subject: [PATCH 058/741] mtd: cmdlineparts: Use 64-bit format when printing a debug message. Commit 69423d99fc182a81f3c5db3eb5c140acc6fc64be ("[MTD] update internal API to support 64-bit device size") has changed some structure values to 64-bit and has not updated this debug message, since it's not built by default. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David Woodhouse --- drivers/mtd/cmdlinepart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c index 5011fa73f918..1479da6d3aa6 100644 --- a/drivers/mtd/cmdlinepart.c +++ b/drivers/mtd/cmdlinepart.c @@ -194,7 +194,7 @@ static struct mtd_partition * newpart(char *s, parts[this_part].name = extra_mem; extra_mem += name_len + 1; - dbg(("partition %d: name <%s>, offset %x, size %x, mask flags %x\n", + dbg(("partition %d: name <%s>, offset %llx, size %llx, mask flags %x\n", this_part, parts[this_part].name, parts[this_part].offset, From ae27a7ab2c74f9c075e03730c5f493163d048c62 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 24 Jun 2009 18:40:46 -0300 Subject: [PATCH 059/741] mtd: atmel_nand: Fix typo s/parititions/partitions/ Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David Woodhouse --- drivers/mtd/nand/atmel_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 2802992b39da..20c828ba9405 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -534,7 +534,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev) &num_partitions); if ((!partitions) || (num_partitions == 0)) { - printk(KERN_ERR "atmel_nand: No parititions defined, or unsupported device.\n"); + printk(KERN_ERR "atmel_nand: No partitions defined, or unsupported device.\n"); res = ENXIO; goto err_no_partitions; } From a9d9058abab4ac17b79d500506e6c74bd16cecdc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 25 Jun 2009 10:16:11 +0100 Subject: [PATCH 060/741] kmemleak: Allow the early log buffer to be configurable. (feature suggested by Sergey Senozhatsky) Kmemleak needs to track all the memory allocations but some of these happen before kmemleak is initialised. These are stored in an internal buffer which may be exceeded in some kernel configurations. This patch adds a configuration option with a default value of 400 and also removes the stack dump when the early log buffer is exceeded. Signed-off-by: Catalin Marinas Acked-by: Sergey Senozhatsky --- Documentation/kmemleak.txt | 4 ++++ lib/Kconfig.debug | 12 ++++++++++++ mm/kmemleak.c | 5 +++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index 0112da3b9ab8..f655308064d7 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -41,6 +41,10 @@ Memory scanning parameters can be modified at run-time by writing to the Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on the kernel command line. +Memory may be allocated or freed before kmemleak is initialised and +these actions are stored in an early log buffer. The size of this buffer +is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option. + Basic Algorithm --------------- diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4c32b1a1a06e..12327b2bb785 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -359,6 +359,18 @@ config DEBUG_KMEMLEAK In order to access the kmemleak file, debugfs needs to be mounted (usually at /sys/kernel/debug). +config DEBUG_KMEMLEAK_EARLY_LOG_SIZE + int "Maximum kmemleak early log entries" + depends on DEBUG_KMEMLEAK + range 200 2000 + default 400 + help + Kmemleak must track all the memory allocations to avoid + reporting false positives. Since memory may be allocated or + freed before kmemleak is initialised, an early log buffer is + used to store these actions. If kmemleak reports "early log + buffer exceeded", please increase this value. + config DEBUG_KMEMLEAK_TEST tristate "Simple test for the kernel memory leak detector" depends on DEBUG_KMEMLEAK diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c96f2c8700aa..17096d1b59b2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -235,7 +235,7 @@ struct early_log { }; /* early logging buffer and current position */ -static struct early_log early_log[200]; +static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE]; static int crt_early_log; static void kmemleak_disable(void); @@ -696,7 +696,8 @@ static void log_early(int op_type, const void *ptr, size_t size, struct early_log *log; if (crt_early_log >= ARRAY_SIZE(early_log)) { - kmemleak_stop("Early log buffer exceeded\n"); + pr_warning("Early log buffer exceeded\n"); + kmemleak_disable(); return; } From 76c64c5e4c47b6d28deb3cae8dfa07a93c2229dc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 24 Jun 2009 21:08:36 +0200 Subject: [PATCH 061/741] perf record: Fix filemap pathname parsing in /proc/pid/maps Looking backward for the first space from the end of a line in /proc/pid/maps does not find the start of the pathname of the mapped file if it contains a space. Since the only slashes we have in this file occur in the (absolute!) pathname column of file mappings, looking for the first slash in a line is a safe method to find the name. Signed-off-by: Johannes Weiner Cc: Stefani Seibold Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Peter Zijlstra LKML-Reference: <20090624190835.GA25548@cmpxchg.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d7ebbd757543..9b899ba1b410 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -306,12 +306,11 @@ static void pid_synthesize_mmap_samples(pid_t pid) continue; pbf += n + 3; if (*pbf == 'x') { /* vm_exec */ - char *execname = strrchr(bf, ' '); + char *execname = strchr(bf, '/'); - if (execname == NULL || execname[1] != '/') + if (execname == NULL) continue; - execname += 1; size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(mmap_ev.filename, execname, size); From 06813f6c743420c16f9248ab59bd2e68a2de57ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 17:16:07 +0530 Subject: [PATCH 062/741] perf_counter tools: Check for valid cache operations Made new table for cache operartion stat 'hw_cache_stat' as: L1I : Read and prefetch only ITLB and BPU : Read-only introduce is_cache_op_valid() for cache operation validity And checks for valid cache operations. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245930367.5308.33.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 06af2fadcd87..7939a21130d2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -90,6 +90,34 @@ static char *hw_cache_result[][MAX_ALIASES] = { { "Miss" }, }; +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_READ (1 << C(OP_READ)) +#define CACHE_WRITE (1 << C(OP_WRITE)) +#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) +#define COP(x) (1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long hw_cache_stat[C(MAX)] = { + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)] = (CACHE_READ), + [C(BPU)] = (CACHE_READ), +}; + +static int is_cache_op_valid(u8 cache_type, u8 cache_op) +{ + if (hw_cache_stat[cache_type] & COP(cache_op)) + return 1; /* valid */ + else + return 0; /* invalid */ +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -123,6 +151,8 @@ char *event_name(int counter) if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) return "unknown-ext-hardware-cache-result"; + if (!is_cache_op_valid(cache_type, cache_op)) + return "invalid-cache"; sprintf(name, "%s-Cache-%s-%ses", hw_cache[cache_type][0], hw_cache_op[cache_op][0], @@ -179,6 +209,9 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; + if (!is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; + cache_result = parse_aliases(str, hw_cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); /* From 1fbcf37128cc19bd67d9a736fb634dc444e907d7 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 25 Jun 2009 21:17:19 +0900 Subject: [PATCH 063/741] sh: Kill off unused DEBUG_BOOTMEM symbol. This was killed off in generic code some time ago, kill off the left over symbol. Signed-off-by: Paul Mundt --- arch/sh/Kconfig.debug | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 8ece0b5bd028..39224b57c6ef 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -61,10 +61,6 @@ config EARLY_PRINTK select both the EARLY_SCIF_CONSOLE and SH_STANDARD_BIOS, using the kernel command line option to toggle back and forth. -config DEBUG_BOOTMEM - depends on DEBUG_KERNEL - bool "Debug BOOTMEM initialization" - config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL && SUPERH32 From 7aa5514e7170c6179272bc638a980adc1738fd29 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 22 Jun 2009 09:23:36 +0100 Subject: [PATCH 064/741] [ARM] 5560/1: Avoid buffer overrun in case of an invalid IRQ handle_bad_irq() expects the IRQ number to be valid (used for statistics), so it cannot be called with an illegal vector. The problem was reported by a static analysis tool. The change makes bad_irq_desc redundant, so delete it. Signed-off-by: Aaro Koskinen Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 096f600dc8d8..b7c3490eaa24 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -98,17 +98,6 @@ unlock: return 0; } -/* Handle bad interrupts */ -static struct irq_desc bad_irq_desc = { - .handle_irq = handle_bad_irq, - .lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock), -}; - -#ifdef CONFIG_CPUMASK_OFFSTACK -/* We are not allocating bad_irq_desc.affinity or .pending_mask */ -#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK." -#endif - /* * do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their @@ -124,10 +113,13 @@ asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ - if (irq >= NR_IRQS) - handle_bad_irq(irq, &bad_irq_desc); - else + if (unlikely(irq >= NR_IRQS)) { + if (printk_ratelimit()) + printk(KERN_WARNING "Bad IRQ%u\n", irq); + ack_bad_irq(irq); + } else { generic_handle_irq(irq); + } /* AT91 specific workaround */ irq_finish(irq); @@ -165,10 +157,6 @@ void __init init_IRQ(void) for (irq = 0; irq < NR_IRQS; irq++) irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; -#ifdef CONFIG_SMP - cpumask_setall(bad_irq_desc.affinity); - bad_irq_desc.node = smp_processor_id(); -#endif init_arch_irq(); } From f6430a938dc6d77e33722aaf6a58382b3423935d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 24 Jun 2009 23:38:56 +0100 Subject: [PATCH 065/741] [ARM] 5565/2: Use PAGE_SIZE and RO_DATA() in link script Update the link script for ARM to use PAGE_SIZE instead of hard- coded 4096. Also the old RODATA macro is deprecated for the RO_DATA(PAGE_SIZE) macro. As a consequence the PAGE_SIZE was changed from (1UL << PAGE_SHIFT) to (_AC(1,UL) << PAGE_SHIFT) because the linker does not understand the "UL" suffix to numeric constants. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/page.h | 2 +- arch/arm/kernel/vmlinux.lds.S | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index be962c1349c4..9c746af1bf6e 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -12,7 +12,7 @@ /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 4340bf3d2c84..69371028a202 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #include #include #include +#include OUTPUT_ARCH(arm) ENTRY(stext) @@ -63,7 +64,7 @@ SECTIONS usr/built-in.o(.init.ramfs) __initramfs_end = .; #endif - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __per_cpu_load = .; __per_cpu_start = .; *(.data.percpu.page_aligned) @@ -73,7 +74,7 @@ SECTIONS #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; INIT_DATA - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __init_end = .; #endif } @@ -118,7 +119,7 @@ SECTIONS *(.got) /* Global offset table */ } - RODATA + RO_DATA(PAGE_SIZE) _etext = .; /* End of text and rodata section */ @@ -158,17 +159,17 @@ SECTIONS *(.data.init_task) #ifdef CONFIG_XIP_KERNEL - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __init_begin = .; INIT_DATA - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __init_end = .; #endif - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_begin = .; *(.data.nosave) - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_end = .; /* From 308ff823ebd749a94d3b6ac26b95bc0eb114c39e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Jun 2009 16:32:52 +0200 Subject: [PATCH 066/741] nf_conntrack: Use rcu_barrier() RCU barriers, rcu_barrier(), is inserted two places. In nf_conntrack_expect.c nf_conntrack_expect_fini() before the kmem_cache_destroy(). Firstly to make sure the callback to the nf_ct_expect_free_rcu() code is still around. Secondly because I'm unsure about the consequence of having in flight nf_ct_expect_free_rcu/kmem_cache_free() calls while doing a kmem_cache_destroy() slab destroy. And in nf_conntrack_extend.c nf_ct_extend_unregister(), inorder to wait for completion of callbacks to __nf_ct_ext_free_rcu(), which is invoked by __nf_ct_ext_add(). It might be more efficient to call rcu_barrier() in nf_conntrack_core.c nf_conntrack_cleanup_net(), but thats make it more difficult to read the code (as the callback code in located in nf_conntrack_extend.c). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 4 +++- net/netfilter/nf_conntrack_extend.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index afde8f991646..2032dfe25ca8 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -617,8 +617,10 @@ err1: void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + } nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4b2c769d555f..fef95be334bd 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -186,6 +186,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); From e5c59547791f171b280bc4c4b2c3ff171824c1a3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 18:25:22 +0530 Subject: [PATCH 067/741] perf_counter tools: Shorten names for events Added new alias for events. On AMD box: $ ./perf stat -e l1d -e l1d-misses -e l1d-write -e l1d-prefetch -e l1d-prefetch-miss -e l1i -e l1i-misses -e l1i-prefetch -e l2 -e l2-misses -e l2-write -e dtlb -e dtlb-misses -e itlb -e itlb-misses -e bpu -e bpu-misses -- ls -lR /usr/include/ > /dev/null Before : Performance counter stats for 'ls -lR /usr/include/': 248064467 L1-data-Cache-Load-Referencees (scaled from 23.27%) 1001433 L1-data-Cache-Load-Misses (scaled from 23.34%) 153691 L1-data-Cache-Store-Referencees (scaled from 23.34%) 423248 L1-data-Cache-Prefetch-Referencees (scaled from 23.33%) 302138 L1-data-Cache-Prefetch-Misses (scaled from 23.25%) 251217546 L1-instruction-Cache-Load-Referencees (scaled from 23.25%) 5757005 L1-instruction-Cache-Load-Misses (scaled from 23.23%) 93435 L1-instruction-Cache-Prefetch-Referencees (scaled from 23.24%) 6496073 L2-Cache-Load-Referencees (scaled from 23.32%) 609485 L2-Cache-Load-Misses (scaled from 23.45%) 6876991 L2-Cache-Store-Referencees (scaled from 23.71%) 248922840 Data-TLB-Cache-Load-Referencees (scaled from 23.94%) 5828386 Data-TLB-Cache-Load-Misses (scaled from 24.17%) 257613506 Instruction-TLB-Cache-Load-Referencees (scaled from 24.20%) 6833 Instruction-TLB-Cache-Load-Misses (scaled from 23.88%) 109043606 Branch-Cache-Load-Referencees (scaled from 23.64%) 5552296 Branch-Cache-Load-Misses (scaled from 23.42%) 0.413702461 seconds time elapsed. After : Peformance counter stats for 'ls -lR /usr/include/': 266590464 L1-d$-loads (scaled from 23.03%) 1222273 L1-d$-load-misses (scaled from 23.58%) 146204 L1-d$-stores (scaled from 23.83%) 406344 L1-d$-prefetches (scaled from 24.09%) 283748 L1-d$-prefetch-misses (scaled from 24.10%) 249650965 L1-i$-loads (scaled from 23.80%) 3353961 L1-i$-load-misses (scaled from 23.82%) 104599 L1-i$-prefetches (scaled from 23.68%) 4836405 LLC-loads (scaled from 23.67%) 498214 LLC-load-misses (scaled from 23.66%) 4953994 LLC-stores (scaled from 23.64%) 243354097 dTLB-loads (scaled from 23.77%) 6468584 dTLB-load-misses (scaled from 23.74%) 249719549 iTLB-loads (scaled from 23.25%) 5060 iTLB-load-misses (scaled from 23.00%) 112343016 branch-loads (scaled from 22.76%) 5528876 branch-load-misses (scaled from 22.54%) 0.427154051 seconds time elapsed. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245934522.5308.39.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 45 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7939a21130d2..430f06083201 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,23 +71,23 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-data", "l1-d", "l1d" }, - { "L1-instruction", "l1-i", "l1i" }, - { "L2", "l2" }, - { "Data-TLB", "dtlb", "d-tlb" }, - { "Instruction-TLB", "itlb", "i-tlb" }, - { "Branch", "bpu" , "btb", "bpc" }, + { "L1-d$", "l1-d", "L1-data", }, + { "L1-i$", "l1-i", "L1-instruction", }, + { "LLC", "L2" }, + { "dTLB", "d-tlb", "Data-TLB", }, + { "iTLB", "i-tlb", "Instruction-TLB", }, + { "branch", "branches", "bpu", "btb", "bpc", }, }; static char *hw_cache_op[][MAX_ALIASES] = { - { "Load", "read" }, - { "Store", "write" }, - { "Prefetch", "speculative-read", "speculative-load" }, + { "load", "loads", "read", }, + { "store", "stores", "write", }, + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; static char *hw_cache_result[][MAX_ALIASES] = { - { "Reference", "ops", "access" }, - { "Miss" }, + { "refs", "Reference", "ops", "access", }, + { "misses", "miss", }, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -118,6 +118,22 @@ static int is_cache_op_valid(u8 cache_type, u8 cache_op) return 0; /* invalid */ } +static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) +{ + static char name[50]; + + if (cache_result) { + sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][0], + hw_cache_result[cache_result][0]); + } else { + sprintf(name, "%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][1]); + } + + return name; +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -137,7 +153,6 @@ char *event_name(int counter) case PERF_TYPE_HW_CACHE: { u8 cache_type, cache_op, cache_result; - static char name[100]; cache_type = (config >> 0) & 0xff; if (cache_type > PERF_COUNT_HW_CACHE_MAX) @@ -153,12 +168,8 @@ char *event_name(int counter) if (!is_cache_op_valid(cache_type, cache_op)) return "invalid-cache"; - sprintf(name, "%s-Cache-%s-%ses", - hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - return name; + return event_cache_name(cache_type, cache_op, cache_result); } case PERF_TYPE_SOFTWARE: From e08afeb7e69f45e4ab9fbb8530fe433484b96606 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 23 Jun 2009 17:14:01 -0500 Subject: [PATCH 068/741] [SCSI] ibmvscsi: Fix module load hang Fixes a regression seen in the ibmvscsi driver when using the VSCSI server in SLES 9 and SLES 10. The VSCSI server in these releases has a bug in it in which it does not send responses to unknown MADs. Check the OS Type field in the adapter info response and do not send these unsupported commands when talking to an older server. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvscsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 869a11bdccbd..9928704e235f 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1095,9 +1095,14 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) MAX_INDIRECT_BUFS); hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; } + + if (hostdata->madapter_info.os_type == 3) { + enable_fast_fail(hostdata); + return; + } } - enable_fast_fail(hostdata); + send_srp_login(hostdata); } /** From 87a2d34b0372dcf6bc4caf4d97a7889f5e62a1af Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 23 Jun 2009 01:06:40 +0200 Subject: [PATCH 069/741] [SCSI] fnic: remove redundant BUG_ONs and fix checks on unsigned The shost sg tablesize is set to FNIC_MAX_SG_DESC_CNT and fnic uses scsi_dma_map, so both BUG_ONs can be removed. scsi_dma_map may return -ENOMEM, sg_count should be int to catch that. Signed-off-by: Roel Kluin Signed-off-by: James Bottomley --- drivers/scsi/fnic/fnic_scsi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index eabf36502856..bfc996971b81 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -245,7 +245,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, struct vnic_wq_copy *wq, struct fnic_io_req *io_req, struct scsi_cmnd *sc, - u32 sg_count) + int sg_count) { struct scatterlist *sg; struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); @@ -260,9 +260,6 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, char msg[2]; if (sg_count) { - BUG_ON(sg_count < 0); - BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT); - /* For each SGE, create a device desc entry */ desc = io_req->sgl_list; for_each_sg(scsi_sglist(sc), sg, sg_count, i) { @@ -344,7 +341,7 @@ int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) struct fnic *fnic; struct vnic_wq_copy *wq; int ret; - u32 sg_count; + int sg_count; unsigned long flags; unsigned long ptr; From e3f47cc74bddea8121560026185ede4770170043 Mon Sep 17 00:00:00 2001 From: Abhijeet Joglekar Date: Wed, 24 Jun 2009 07:42:25 -0700 Subject: [PATCH 070/741] [SCSI] fnic: use DMA_BIT_MASK(nn) instead of deprecated DMA_nnBIT_MASK Robert Love reported warning while building fnic_main.c: drivers/scsi/fnic/fnic_main.c:478: warning: `DMA_nnBIT_MASK' is deprecated. Replaced use of DMA_nnBIT_MASK by DMA_BIT_MASK(nn) Signed-off-by: Abhijeet Joglekar Signed-off-by: James Bottomley --- drivers/scsi/fnic/fnic_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index a84072865fc2..2c266c01dc5a 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -473,16 +473,16 @@ static int __devinit fnic_probe(struct pci_dev *pdev, * limitation for the device. Try 40-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_40BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); if (err) { - err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "No usable DMA configuration " "aborting\n"); goto err_out_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "Unable to obtain 32-bit DMA " @@ -490,7 +490,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev, goto err_out_release_regions; } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "Unable to obtain 40-bit DMA " From d3a263a8168f78874254ea9da9595cfb0f3e96d7 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 24 Jun 2009 19:55:22 +0000 Subject: [PATCH 071/741] [SCSI] zalon: fix oops on attach failure I recently discovered on my zalon that if the attachment fails because of a bus misconfiguration (I scrapped my HVD array, so the card is now unterminated) then the system oopses. The reason is that if ncr_attach() returns NULL (signalling failure) that NULL is passed by the goto failed straight into ncr_detach() which oopses. The fix is just to return -ENODEV in this case. Cc: Stable Tree Signed-off-by: James Bottomley --- drivers/scsi/zalon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c index 97f3158fa7b5..27e84e4b1fa9 100644 --- a/drivers/scsi/zalon.c +++ b/drivers/scsi/zalon.c @@ -134,7 +134,7 @@ zalon_probe(struct parisc_device *dev) host = ncr_attach(&zalon7xx_template, unit, &device); if (!host) - goto fail; + return -ENODEV; if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) { dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ", From 6fdc03709433ccc2005f0f593ae9d9dd04f7b485 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 20 Jun 2009 13:23:59 +0200 Subject: [PATCH 072/741] firewire: core: do not DMA-map stack addresses The DMA mapping API cannot map on-stack addresses, as explained in Documentation/DMA-mapping.txt. Convert the two cases of on-stack packet payload buffers in firewire-core (payload of lock requests in the bus manager work and in iso resource management) to slab-allocated memory. There are a number on-stack buffers for quadlet write or quadlet read requests in firewire-core and firewire-sbp2. These are harmless; they are copied to/ from card driver internal DMA buffers since quadlet payloads are inlined with packet headers. Signed-off-by: Stefan Richter --- drivers/firewire/core-card.c | 14 +++++++------- drivers/firewire/core-cdev.c | 4 +++- drivers/firewire/core-iso.c | 24 +++++++++++++----------- drivers/firewire/core.h | 3 ++- include/linux/firewire.h | 1 + 5 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 543fccac81bb..f74edae5cb4c 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -196,8 +196,8 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation) { int channel, bandwidth = 0; - fw_iso_resource_manage(card, generation, 1ULL << 31, - &channel, &bandwidth, true); + fw_iso_resource_manage(card, generation, 1ULL << 31, &channel, + &bandwidth, true, card->bm_transaction_data); if (channel == 31) { card->broadcast_channel_allocated = true; device_for_each_child(card->device, (void *)(long)generation, @@ -230,7 +230,6 @@ static void fw_card_bm_work(struct work_struct *work) bool do_reset = false; bool root_device_is_running; bool root_device_is_cmc; - __be32 lock_data[2]; spin_lock_irqsave(&card->lock, flags); @@ -273,22 +272,23 @@ static void fw_card_bm_work(struct work_struct *work) goto pick_me; } - lock_data[0] = cpu_to_be32(0x3f); - lock_data[1] = cpu_to_be32(local_id); + card->bm_transaction_data[0] = cpu_to_be32(0x3f); + card->bm_transaction_data[1] = cpu_to_be32(local_id); spin_unlock_irqrestore(&card->lock, flags); rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP, irm_id, generation, SCODE_100, CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID, - lock_data, sizeof(lock_data)); + card->bm_transaction_data, + sizeof(card->bm_transaction_data)); if (rcode == RCODE_GENERATION) /* Another bus reset, BM work has been rescheduled. */ goto out; if (rcode == RCODE_COMPLETE && - lock_data[0] != cpu_to_be32(0x3f)) { + card->bm_transaction_data[0] != cpu_to_be32(0x3f)) { /* Somebody else is BM. Only act as IRM. */ if (local_id == irm_id) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index d1d30c615b0f..ced186d7e9a9 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -125,6 +125,7 @@ struct iso_resource { int generation; u64 channels; s32 bandwidth; + __be32 transaction_data[2]; struct iso_resource_event *e_alloc, *e_dealloc; }; @@ -1049,7 +1050,8 @@ static void iso_resource_work(struct work_struct *work) r->channels, &channel, &bandwidth, todo == ISO_RES_ALLOC || todo == ISO_RES_REALLOC || - todo == ISO_RES_ALLOC_ONCE); + todo == ISO_RES_ALLOC_ONCE, + r->transaction_data); /* * Is this generation outdated already? As long as this resource sticks * in the idr, it will be scheduled again for a newer generation or at diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c index 166f19c6d38d..110e731f5574 100644 --- a/drivers/firewire/core-iso.c +++ b/drivers/firewire/core-iso.c @@ -177,9 +177,8 @@ EXPORT_SYMBOL(fw_iso_context_stop); */ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation, - int bandwidth, bool allocate) + int bandwidth, bool allocate, __be32 data[2]) { - __be32 data[2]; int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0; /* @@ -215,9 +214,9 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation, } static int manage_channel(struct fw_card *card, int irm_id, int generation, - u32 channels_mask, u64 offset, bool allocate) + u32 channels_mask, u64 offset, bool allocate, __be32 data[2]) { - __be32 data[2], c, all, old; + __be32 c, all, old; int i, retry = 5; old = all = allocate ? cpu_to_be32(~0) : 0; @@ -260,7 +259,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation, } static void deallocate_channel(struct fw_card *card, int irm_id, - int generation, int channel) + int generation, int channel, __be32 buffer[2]) { u32 mask; u64 offset; @@ -269,7 +268,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id, offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI : CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO; - manage_channel(card, irm_id, generation, mask, offset, false); + manage_channel(card, irm_id, generation, mask, offset, false, buffer); } /** @@ -298,7 +297,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id, */ void fw_iso_resource_manage(struct fw_card *card, int generation, u64 channels_mask, int *channel, int *bandwidth, - bool allocate) + bool allocate, __be32 buffer[2]) { u32 channels_hi = channels_mask; /* channels 31...0 */ u32 channels_lo = channels_mask >> 32; /* channels 63...32 */ @@ -310,10 +309,12 @@ void fw_iso_resource_manage(struct fw_card *card, int generation, if (channels_hi) c = manage_channel(card, irm_id, generation, channels_hi, - CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI, allocate); + CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI, + allocate, buffer); if (channels_lo && c < 0) { c = manage_channel(card, irm_id, generation, channels_lo, - CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO, allocate); + CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO, + allocate, buffer); if (c >= 0) c += 32; } @@ -325,12 +326,13 @@ void fw_iso_resource_manage(struct fw_card *card, int generation, if (*bandwidth == 0) return; - ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate); + ret = manage_bandwidth(card, irm_id, generation, *bandwidth, + allocate, buffer); if (ret < 0) *bandwidth = 0; if (allocate && ret < 0 && c >= 0) { - deallocate_channel(card, irm_id, generation, c); + deallocate_channel(card, irm_id, generation, c, buffer); *channel = ret; } } diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index c3cfc647e5e3..6052816be353 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -120,7 +120,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event); int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma); void fw_iso_resource_manage(struct fw_card *card, int generation, - u64 channels_mask, int *channel, int *bandwidth, bool allocate); + u64 channels_mask, int *channel, int *bandwidth, + bool allocate, __be32 buffer[2]); /* -topology */ diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 9823946adbc5..192d1e43c43c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -127,6 +127,7 @@ struct fw_card { struct delayed_work work; int bm_retries; int bm_generation; + __be32 bm_transaction_data[2]; bool broadcast_channel_allocated; u32 broadcast_channel; From dfc2f91ac29f5ef50e74bf15a1a6b6aa6b952e62 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 26 Jun 2009 04:31:57 +0900 Subject: [PATCH 073/741] nommu: provide follow_pfn(). With the introduction of follow_pfn() as an exported symbol, modules have begun making use of it. Unfortunately this was not reflected on nommu at the time, so the in-tree users have subsequently all blown up with link errors there. This provides a simple follow_pfn() that just returns addr >> PAGE_SHIFT, which will do the right thing on nommu. There is no need to do range checking within the vma, as the find_vma() case will already take care of this. Signed-off-by: Paul Mundt --- mm/nommu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mm/nommu.c b/mm/nommu.c index 2fd2ad5da98e..598bc871487a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -240,6 +240,27 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +/** + * follow_pfn - look up PFN at a user virtual address + * @vma: memory mapping + * @address: user virtual address + * @pfn: location to store found PFN + * + * Only IO mappings and raw PFN mappings are allowed. + * + * Returns zero and the pfn at @pfn on success, -ve otherwise. + */ +int follow_pfn(struct vm_area_struct *vma, unsigned long address, + unsigned long *pfn) +{ + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return -EINVAL; + + *pfn = address >> PAGE_SHIFT; + return 0; +} +EXPORT_SYMBOL(follow_pfn); + DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; From 7c6a1c65bbd3be688e581511f45818663efc1877 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 17:05:54 +0200 Subject: [PATCH 074/741] perf_counter tools: Rework the file format Create a structured file format that includes the full perf_counter_attr and all its relevant counter IDs so that the reporting program has full information. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 3 +- tools/perf/builtin-record.c | 100 +++++++++----- tools/perf/builtin-report.c | 37 ++++-- tools/perf/perf.h | 8 +- tools/perf/util/header.c | 242 ++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 37 ++++++ tools/perf/util/string.h | 2 +- tools/perf/util/symbol.h | 2 +- tools/perf/{ => util}/types.h | 0 9 files changed, 377 insertions(+), 54 deletions(-) create mode 100644 tools/perf/util/header.c create mode 100644 tools/perf/util/header.h rename tools/perf/{ => util}/types.h (100%) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef49913..d3887ed51a64 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,7 +290,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += perf.h -LIB_H += types.h +LIB_H += util/types.h LIB_H += util/list.h LIB_H += util/rbtree.h LIB_H += util/levenshtein.h @@ -328,6 +328,7 @@ LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o +LIB_OBJS += util/header.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9b899ba1b410..f4f0240d2302 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -14,6 +14,8 @@ #include "util/parse-events.h" #include "util/string.h" +#include "util/header.h" + #include #include @@ -52,7 +54,8 @@ static int nr_poll; static int nr_cpu; static int file_new = 1; -static struct perf_file_header file_header; + +struct perf_header *header; struct mmap_event { struct perf_event_header header; @@ -328,7 +331,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) fclose(fp); } -static void synthesize_samples(void) +static void synthesize_all(void) { DIR *proc; struct dirent dirent, *next; @@ -352,10 +355,35 @@ static void synthesize_samples(void) static int group_fd; +static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +{ + struct perf_header_attr *h_attr; + + if (nr < header->attrs) { + h_attr = header->attr[nr]; + } else { + h_attr = perf_header_attr__new(a); + perf_header__add_attr(header, h_attr); + } + + return h_attr; +} + static void create_counter(int counter, int cpu, pid_t pid) { struct perf_counter_attr *attr = attrs + counter; - int track = 1; + struct perf_header_attr *h_attr; + int track = !counter; /* only the first counter needs these */ + struct { + u64 count; + u64 time_enabled; + u64 time_running; + u64 id; + } read_data; + + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -368,22 +396,11 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - if (file_new) { - file_header.sample_type = attr->sample_type; - } else { - if (file_header.sample_type != attr->sample_type) { - fprintf(stderr, "incompatible append\n"); - exit(-1); - } - } - attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; attr->disabled = 1; - track = 0; /* only the first counter needs these */ - try_again: fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); @@ -414,6 +431,19 @@ try_again: exit(-1); } + h_attr = get_header_attr(attr, counter); + + if (!file_new) { + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + + read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + + perf_header_attr__add_id(h_attr, read_data.id); + assert(fd[nr_cpu][counter] >= 0); fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); @@ -444,11 +474,6 @@ static void open_counters(int cpu, pid_t pid) { int counter; - if (pid > 0) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); - } - group_fd = -1; for (counter = 0; counter < nr_counters; counter++) create_counter(counter, cpu, pid); @@ -458,17 +483,16 @@ static void open_counters(int cpu, pid_t pid) static void atexit_header(void) { - file_header.data_size += bytes_written; + header->data_size += bytes_written; - if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) - perror("failed to write on file headers"); + perf_header__write(header, output); } static int __cmd_record(int argc, const char **argv) { int i, counter; struct stat st; - pid_t pid; + pid_t pid = 0; int flags; int ret; @@ -499,22 +523,31 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!file_new) { - if (read(output, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } - - lseek(output, file_header.data_size, SEEK_CUR); - } + if (!file_new) + header = perf_header__read(output); + else + header = perf_header__new(); atexit(atexit_header); if (!system_wide) { - open_counters(-1, target_pid != -1 ? target_pid : getpid()); + pid = target_pid; + if (pid == -1) + pid = getpid(); + + open_counters(-1, pid); } else for (i = 0; i < nr_cpus; i++) open_counters(i, target_pid); + if (file_new) + perf_header__write(header, output); + + if (!system_wide) { + pid_synthesize_comm_event(pid, 0); + pid_synthesize_mmap_samples(pid); + } else + synthesize_all(); + if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) @@ -538,9 +571,6 @@ static int __cmd_record(int argc, const char **argv) } } - if (system_wide) - synthesize_samples(); - while (!done) { int hits = samples; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b4e76f75ba87..e575f3039766 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -17,6 +17,7 @@ #include "util/string.h" #include "perf.h" +#include "util/header.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -1385,13 +1386,27 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_file_header file_header; +static struct perf_header *header; + +static int perf_header__has_sample(u64 sample_mask) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + + if (!(attr->attr.sample_type & sample_mask)) + return 0; + } + + return 1; +} static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = sizeof(file_header); + unsigned long head, shift; struct stat stat; event_t *event; uint32_t size; @@ -1419,13 +1434,11 @@ static int __cmd_report(void) exit(0); } - if (read(input, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } + header = perf_header__read(input); + head = header->data_offset; if (sort__has_parent && - !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } @@ -1445,6 +1458,11 @@ static int __cmd_report(void) cwd = NULL; cwdlen = 0; } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -1461,9 +1479,10 @@ more: size = 8; if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); int ret; + shift = page_size * (head / page_size); + ret = munmap(buf, page_size * mmap_window); assert(ret == 0); @@ -1501,7 +1520,7 @@ more: head += size; - if (offset + head >= sizeof(file_header) + file_header.data_size) + if (offset + head >= header->data_offset + header->data_size) goto done; if (offset + head < stat.st_size) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index bccb529dac08..16c84fd73c86 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,7 +19,7 @@ #include #include "../../include/linux/perf_counter.h" -#include "types.h" +#include "util/types.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all @@ -66,10 +66,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 -struct perf_file_header { - u64 version; - u64 sample_type; - u64 data_size; -}; - #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c new file mode 100644 index 000000000000..450384b3bbe5 --- /dev/null +++ b/tools/perf/util/header.c @@ -0,0 +1,242 @@ +#include +#include +#include +#include + +#include "util.h" +#include "header.h" + +/* + * + */ + +struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) +{ + struct perf_header_attr *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->attr = *attr; + self->ids = 0; + self->size = 1; + self->id = malloc(sizeof(u64)); + + if (!self->id) + die("nomem"); + + return self; +} + +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) +{ + int pos = self->ids; + + self->ids++; + if (self->ids > self->size) { + self->size *= 2; + self->id = realloc(self->id, self->size * sizeof(u64)); + if (!self->id) + die("nomem"); + } + self->id[pos] = id; +} + +/* + * + */ + +struct perf_header *perf_header__new(void) +{ + struct perf_header *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->frozen = 0; + + self->attrs = 0; + self->size = 1; + self->attr = malloc(sizeof(void *)); + + if (!self->attr) + die("nomem"); + + self->data_offset = 0; + self->data_size = 0; + + return self; +} + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr) +{ + int pos = self->attrs; + + if (self->frozen) + die("frozen"); + + self->attrs++; + if (self->attrs > self->size) { + self->size *= 2; + self->attr = realloc(self->attr, self->size * sizeof(void *)); + if (!self->attr) + die("nomem"); + } + self->attr[pos] = attr; +} + +static const char *__perf_magic = "PERFFILE"; + +#define PERF_MAGIC (*(u64 *)__perf_magic) + +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_attr { + struct perf_counter_attr attr; + struct perf_file_section ids; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; +}; + +static void do_write(int fd, void *buf, size_t size) +{ + while (size) { + int ret = write(fd, buf, size); + + if (ret < 0) + die("failed to write"); + + size -= ret; + buf += ret; + } +} + +void perf_header__write(struct perf_header *self, int fd) +{ + struct perf_file_header f_header; + struct perf_file_attr f_attr; + struct perf_header_attr *attr; + int i; + + lseek(fd, sizeof(f_header), SEEK_SET); + + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + attr->id_offset = lseek(fd, 0, SEEK_CUR); + do_write(fd, attr->id, attr->ids * sizeof(u64)); + } + + + self->attr_offset = lseek(fd, 0, SEEK_CUR); + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + f_attr = (struct perf_file_attr){ + .attr = attr->attr, + .ids = { + .offset = attr->id_offset, + .size = attr->ids * sizeof(u64), + } + }; + do_write(fd, &f_attr, sizeof(f_attr)); + } + + + self->data_offset = lseek(fd, 0, SEEK_CUR); + + f_header = (struct perf_file_header){ + .magic = PERF_MAGIC, + .size = sizeof(f_header), + .attr_size = sizeof(f_attr), + .attrs = { + .offset = self->attr_offset, + .size = self->attrs * sizeof(f_attr), + }, + .data = { + .offset = self->data_offset, + .size = self->data_size, + }, + }; + + lseek(fd, 0, SEEK_SET); + do_write(fd, &f_header, sizeof(f_header)); + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; +} + +static void do_read(int fd, void *buf, size_t size) +{ + while (size) { + int ret = read(fd, buf, size); + + if (ret < 0) + die("failed to read"); + + size -= ret; + buf += ret; + } +} + +struct perf_header *perf_header__read(int fd) +{ + struct perf_header *self = perf_header__new(); + struct perf_file_header f_header; + struct perf_file_attr f_attr; + u64 f_id; + + int nr_attrs, nr_ids, i, j; + + lseek(fd, 0, SEEK_SET); + do_read(fd, &f_header, sizeof(f_header)); + + if (f_header.magic != PERF_MAGIC || + f_header.size != sizeof(f_header) || + f_header.attr_size != sizeof(f_attr)) + die("incompatible file format"); + + nr_attrs = f_header.attrs.size / sizeof(f_attr); + lseek(fd, f_header.attrs.offset, SEEK_SET); + + for (i = 0; i < nr_attrs; i++) { + struct perf_header_attr *attr; + off_t tmp = lseek(fd, 0, SEEK_CUR); + + do_read(fd, &f_attr, sizeof(f_attr)); + + attr = perf_header_attr__new(&f_attr.attr); + + nr_ids = f_attr.ids.size / sizeof(u64); + lseek(fd, f_attr.ids.offset, SEEK_SET); + + for (j = 0; j < nr_ids; j++) { + do_read(fd, &f_id, sizeof(f_id)); + + perf_header_attr__add_id(attr, f_id); + } + perf_header__add_attr(self, attr); + lseek(fd, tmp, SEEK_SET); + } + + self->data_offset = f_header.data.offset; + self->data_size = f_header.data.size; + + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; + + return self; +} diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h new file mode 100644 index 000000000000..b5ef53ad4c7a --- /dev/null +++ b/tools/perf/util/header.h @@ -0,0 +1,37 @@ +#ifndef _PERF_HEADER_H +#define _PERF_HEADER_H + +#include "../../../include/linux/perf_counter.h" +#include +#include "types.h" + +struct perf_header_attr { + struct perf_counter_attr attr; + int ids, size; + u64 *id; + off_t id_offset; +}; + +struct perf_header { + int frozen; + int attrs, size; + struct perf_header_attr **attr; + off_t attr_offset; + u64 data_offset; + u64 data_size; +}; + +struct perf_header *perf_header__read(int fd); +void perf_header__write(struct perf_header *self, int fd); + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr); + +struct perf_header_attr * +perf_header_attr__new(struct perf_counter_attr *attr); +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); + + +struct perf_header *perf_header__new(void); + +#endif /* _PERF_HEADER_H */ diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 37b03255b425..3dca2f654cd0 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,7 +1,7 @@ #ifndef _PERF_STRING_H_ #define _PERF_STRING_H_ -#include "../types.h" +#include "types.h" int hex2u64(const char *ptr, u64 *val); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea332e56e458..940b432db16e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,7 +2,7 @@ #define _PERF_SYMBOL_ 1 #include -#include "../types.h" +#include "types.h" #include "list.h" #include "rbtree.h" diff --git a/tools/perf/types.h b/tools/perf/util/types.h similarity index 100% rename from tools/perf/types.h rename to tools/perf/util/types.h From 41f95331b972a039f519ae0c70f051b7121f7346 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 17:55:18 +0200 Subject: [PATCH 075/741] perf_counter: Split the mmap control page in two parts Since there are two distinct sections to the control page, move them apart so that possible extentions don't overlap. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e7213e46cf9c..489d5cbfbcca 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -233,6 +233,12 @@ struct perf_counter_mmap_page { __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[125]; /* align to 1k */ + /* * Control data for the mmap() data buffer. * From 7f8b4e4e0988dadfd22330fd147ad2453e19f510 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 14:34:35 +0200 Subject: [PATCH 076/741] perf_counter: Add scale information to the mmap control page Add the needed time scale to the self-profile mmap information. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- kernel/perf_counter.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 489d5cbfbcca..bcbf1c43ed42 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -232,12 +232,14 @@ struct perf_counter_mmap_page { __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + __u64 time_enabled; /* time counter active */ + __u64 time_running; /* time counter on cpu */ /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[125]; /* align to 1k */ + __u64 __reserved[123]; /* align to 1k */ /* * Control data for the mmap() data buffer. diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c2b19c111718..23614adab475 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1782,6 +1782,12 @@ void perf_counter_update_userpage(struct perf_counter *counter) if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); + userpg->time_enabled = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + + userpg->time_running = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); + barrier(); ++userpg->lock; preempt_enable(); From 194002b274e9169a04beb1b23dcc132159bb566c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 16:35:24 +0200 Subject: [PATCH 077/741] perf_counter, x86: Add mmap counter read support Update the mmap control page with the needed information to use the userspace RDPMC instruction for self monitoring. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 2 ++ arch/x86/include/asm/perf_counter.h | 3 +++ arch/x86/kernel/cpu/perf_counter.c | 6 ++++++ kernel/perf_counter.c | 10 +++++++++- 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 8ccd4e155768..0ea0639fcf75 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -61,6 +61,8 @@ struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +#define PERF_COUNTER_INDEX_OFFSET 1 + /* * Only override the default definitions in include/linux/perf_counter.h * if we have hardware PMU support. diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 5fb33e160ea0..fa64e401589d 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -87,6 +87,9 @@ union cpuid10_edx { #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); + +#define PERF_COUNTER_INDEX_OFFSET 0 + #else static inline void init_hw_perf_counters(void) { } static inline void perf_counters_lapic_init(void) { } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a310d19faca3..b83474b6021a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -912,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter, err = checking_wrmsrl(hwc->counter_base + idx, (u64)(-left) & x86_pmu.counter_mask); + perf_counter_update_userpage(counter); + return ret; } @@ -1034,6 +1036,8 @@ try_generic: x86_perf_counter_set_period(counter, hwc, idx); x86_pmu.enable(hwc, idx); + perf_counter_update_userpage(counter); + return 0; } @@ -1126,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter) x86_perf_counter_update(counter, hwc, idx); cpuc->counters[idx] = NULL; clear_bit(idx, cpuc->used_mask); + + perf_counter_update_userpage(counter); } /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 23614adab475..02994a719e27 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1753,6 +1753,14 @@ int perf_counter_task_disable(void) return 0; } +static int perf_counter_index(struct perf_counter *counter) +{ + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + return 0; + + return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; +} + /* * Callers need to ensure there can be no nesting of this function, otherwise * the seqlock logic goes bad. We can not serialize this because the arch @@ -1777,7 +1785,7 @@ void perf_counter_update_userpage(struct perf_counter *counter) preempt_disable(); ++userpg->lock; barrier(); - userpg->index = counter->hw.idx; + userpg->index = perf_counter_index(counter); userpg->offset = atomic64_read(&counter->count); if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); From 38b200d67636a30cb8dc1508137908e7a649b5c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 20:13:11 +0200 Subject: [PATCH 078/741] perf_counter: Add PERF_EVENT_READ Provide a read() like event which can be used to log the counter value at specific sites such as child->parent folding on exit. In order to be useful, we log the counter parent ID, not the actual counter ID, since userspace can only relate parent IDs to perf_counter_attr constructs. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 ++++++ kernel/perf_counter.c | 72 ++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index bcbf1c43ed42..6a384f04755a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -334,6 +334,18 @@ enum perf_event_type { */ PERF_EVENT_FORK = 7, + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 parent_id; } && PERF_FORMAT_ID + * }; + */ + PERF_EVENT_READ = 8, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_SAMPLE_* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 02994a719e27..a72c20e91953 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2623,6 +2623,66 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, perf_output_end(&handle); } +/* + * read event + */ + +struct perf_read_event { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 value; + u64 format[3]; +}; + +static void +perf_counter_read_event(struct perf_counter *counter, + struct task_struct *task) +{ + struct perf_output_handle handle; + struct perf_read_event event = { + .header = { + .type = PERF_EVENT_READ, + .misc = 0, + .size = sizeof(event) - sizeof(event.format), + }, + .pid = perf_counter_pid(counter, task), + .tid = perf_counter_tid(counter, task), + .value = atomic64_read(&counter->count), + }; + int ret, i = 0; + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + event.header.size += sizeof(u64); + event.format[i++] = counter->total_time_enabled; + } + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + event.header.size += sizeof(u64); + event.format[i++] = counter->total_time_running; + } + + if (counter->attr.read_format & PERF_FORMAT_ID) { + u64 id; + + event.header.size += sizeof(u64); + if (counter->parent) + id = counter->parent->id; + else + id = counter->id; + + event.format[i++] = id; + } + + ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); + if (ret) + return; + + perf_output_copy(&handle, &event, event.header.size); + perf_output_end(&handle); +} + /* * fork tracking */ @@ -3985,10 +4045,13 @@ static int inherit_group(struct perf_counter *parent_counter, } static void sync_child_counter(struct perf_counter *child_counter, - struct perf_counter *parent_counter) + struct task_struct *child) { + struct perf_counter *parent_counter = child_counter->parent; u64 child_val; + perf_counter_read_event(child_counter, child); + child_val = atomic64_read(&child_counter->count); /* @@ -4017,7 +4080,8 @@ static void sync_child_counter(struct perf_counter *child_counter, static void __perf_counter_exit_task(struct perf_counter *child_counter, - struct perf_counter_context *child_ctx) + struct perf_counter_context *child_ctx, + struct task_struct *child) { struct perf_counter *parent_counter; @@ -4031,7 +4095,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter, * counters need to be zapped - but otherwise linger. */ if (parent_counter) { - sync_child_counter(child_counter, parent_counter); + sync_child_counter(child_counter, child); free_counter(child_counter); } } @@ -4093,7 +4157,7 @@ void perf_counter_exit_task(struct task_struct *child) again: list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, list_entry) - __perf_counter_exit_task(child_counter, child_ctx); + __perf_counter_exit_task(child_counter, child_ctx, child); /* * If the last counter was a group counter, it will have appended all From bfbd3381e63aa2a14c6706afb50ce4630aa0d9a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 21:11:59 +0200 Subject: [PATCH 079/741] perf_counter: Implement more accurate per task statistics With the introduction of PERF_EVENT_READ we have the possibility to provide accurate counter values for individual tasks in a task hierarchy. However, due to the lazy context switching used for similar counter contexts our current per task counts are way off. In order to maintain some of the lazy switch benefits we don't disable it out-right, but simply iterate the active counters and flip the values between the contexts. This only reads the counters but does not need to reprogram the full PMU. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +- kernel/perf_counter.c | 83 ++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6a384f04755a..de70a10b5ec8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -178,8 +178,9 @@ struct perf_counter_attr { mmap : 1, /* include mmap data */ comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ - __reserved_1 : 53; + __reserved_1 : 52; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -602,6 +603,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + int nr_stat; atomic_t refcount; struct task_struct *task; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a72c20e91953..385ca51c6e60 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_add_rcu(&counter->event_entry, &ctx->event_list); ctx->nr_counters++; + if (counter->attr.inherit_stat) + ctx->nr_stat++; } /* @@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) if (list_empty(&counter->list_entry)) return; ctx->nr_counters--; + if (counter->attr.inherit_stat) + ctx->nr_stat--; list_del_init(&counter->list_entry); list_del_rcu(&counter->event_entry); @@ -1006,6 +1010,76 @@ static int context_equiv(struct perf_counter_context *ctx1, && !ctx1->pin_count && !ctx2->pin_count; } +static void __perf_counter_read(void *counter); + +static void __perf_counter_sync_stat(struct perf_counter *counter, + struct perf_counter *next_counter) +{ + u64 value; + + if (!counter->attr.inherit_stat) + return; + + /* + * Update the counter value, we cannot use perf_counter_read() + * because we're in the middle of a context switch and have IRQs + * disabled, which upsets smp_call_function_single(), however + * we know the counter must be on the current CPU, therefore we + * don't need to use it. + */ + switch (counter->state) { + case PERF_COUNTER_STATE_ACTIVE: + __perf_counter_read(counter); + break; + + case PERF_COUNTER_STATE_INACTIVE: + update_counter_times(counter); + break; + + default: + break; + } + + /* + * In order to keep per-task stats reliable we need to flip the counter + * values when we flip the contexts. + */ + value = atomic64_read(&next_counter->count); + value = atomic64_xchg(&counter->count, value); + atomic64_set(&next_counter->count, value); + + /* + * XXX also sync time_enabled and time_running ? + */ +} + +#define list_next_entry(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) + +static void perf_counter_sync_stat(struct perf_counter_context *ctx, + struct perf_counter_context *next_ctx) +{ + struct perf_counter *counter, *next_counter; + + if (!ctx->nr_stat) + return; + + counter = list_first_entry(&ctx->event_list, + struct perf_counter, event_entry); + + next_counter = list_first_entry(&next_ctx->event_list, + struct perf_counter, event_entry); + + while (&counter->event_entry != &ctx->event_list && + &next_counter->event_entry != &next_ctx->event_list) { + + __perf_counter_sync_stat(counter, next_counter); + + counter = list_next_entry(counter, event_entry); + next_counter = list_next_entry(counter, event_entry); + } +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -1061,6 +1135,8 @@ void perf_counter_task_sched_out(struct task_struct *task, ctx->task = next; next_ctx->task = task; do_switch = 0; + + perf_counter_sync_stat(ctx, next_ctx); } spin_unlock(&next_ctx->lock); spin_unlock(&ctx->lock); @@ -1350,7 +1426,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Cross CPU call to read the hardware counter */ -static void __read(void *info) +static void __perf_counter_read(void *info) { struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; @@ -1372,7 +1448,7 @@ static u64 perf_counter_read(struct perf_counter *counter) */ if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, - __read, counter, 1); + __perf_counter_read, counter, 1); } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { update_counter_times(counter); } @@ -4050,7 +4126,8 @@ static void sync_child_counter(struct perf_counter *child_counter, struct perf_counter *parent_counter = child_counter->parent; u64 child_val; - perf_counter_read_event(child_counter, child); + if (child_counter->attr.inherit_stat) + perf_counter_read_event(child_counter, child); child_val = atomic64_read(&child_counter->count); From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 11:27:12 +0200 Subject: [PATCH 080/741] perf_counter: Rework the sample ABI The PERF_EVENT_READ implementation made me realize we don't actually need the sample_type int the output sample, since we already have that in the perf_counter_attr information. Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the event->type overloading, and imply put counter overflow samples in a PERF_EVENT_SAMPLE type. This also fixes the issue that event->type was only 32-bit and sample_type had 64 usable bits. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++----- kernel/perf_counter.c | 36 +++++++++++++++-------------------- tools/perf/builtin-annotate.c | 8 ++++---- tools/perf/builtin-report.c | 32 ++++++++++++++++++------------- tools/perf/builtin-top.c | 11 ++++++----- 5 files changed, 49 insertions(+), 48 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index de70a10b5ec8..3078e23c91eb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -262,7 +262,6 @@ struct perf_counter_mmap_page { #define PERF_EVENT_MISC_KERNEL (1 << 0) #define PERF_EVENT_MISC_USER (2 << 0) #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -348,9 +347,6 @@ enum perf_event_type { PERF_EVENT_READ = 8, /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_SAMPLE_* - * * struct { * struct perf_event_header header; * @@ -358,8 +354,9 @@ enum perf_event_type { * { u32 pid, tid; } && PERF_SAMPLE_TID * { u64 time; } && PERF_SAMPLE_TIME * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 config; } && PERF_SAMPLE_CONFIG + * { u64 id; } && PERF_SAMPLE_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD * * { u64 nr; * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP @@ -368,6 +365,9 @@ enum perf_event_type { * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ + PERF_EVENT_SAMPLE = 9, + + PERF_EVENT_MAX, /* non-ABI */ }; enum perf_callchain_context { diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 385ca51c6e60..f2f232696587 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2575,15 +2575,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u32 cpu, reserved; } cpu_entry; - header.type = 0; + header.type = PERF_EVENT_SAMPLE; header.size = sizeof(header); - header.misc = PERF_EVENT_MISC_OVERFLOW; + header.misc = 0; header.misc |= perf_misc_flags(data->regs); if (sample_type & PERF_SAMPLE_IP) { ip = perf_instruction_pointer(data->regs); - header.type |= PERF_SAMPLE_IP; header.size += sizeof(ip); } @@ -2592,7 +2591,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, tid_entry.pid = perf_counter_pid(counter, current); tid_entry.tid = perf_counter_tid(counter, current); - header.type |= PERF_SAMPLE_TID; header.size += sizeof(tid_entry); } @@ -2602,34 +2600,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, */ time = sched_clock(); - header.type |= PERF_SAMPLE_TIME; header.size += sizeof(u64); } - if (sample_type & PERF_SAMPLE_ADDR) { - header.type |= PERF_SAMPLE_ADDR; + if (sample_type & PERF_SAMPLE_ADDR) header.size += sizeof(u64); - } - if (sample_type & PERF_SAMPLE_ID) { - header.type |= PERF_SAMPLE_ID; + if (sample_type & PERF_SAMPLE_ID) header.size += sizeof(u64); - } if (sample_type & PERF_SAMPLE_CPU) { - header.type |= PERF_SAMPLE_CPU; header.size += sizeof(cpu_entry); cpu_entry.cpu = raw_smp_processor_id(); } - if (sample_type & PERF_SAMPLE_PERIOD) { - header.type |= PERF_SAMPLE_PERIOD; + if (sample_type & PERF_SAMPLE_PERIOD) header.size += sizeof(u64); - } if (sample_type & PERF_SAMPLE_GROUP) { - header.type |= PERF_SAMPLE_GROUP; header.size += sizeof(u64) + counter->nr_siblings * sizeof(group_entry); } @@ -2639,10 +2628,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); - - header.type |= PERF_SAMPLE_CALLCHAIN; header.size += callchain_size; - } + } else + header.size += sizeof(u64); } ret = perf_output_begin(&handle, counter, header.size, nmi, 1); @@ -2693,8 +2681,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); + else { + u64 nr = 0; + perf_output_put(&handle, nr); + } + } perf_output_end(&handle); } diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7e58e3ad1508..722c0f54e549 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -855,7 +855,7 @@ static unsigned long total = 0, total_unknown = 0; static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) static int process_event(event_t *event, unsigned long offset, unsigned long head) { - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e575f3039766..ec5361c67bf5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,8 @@ static regex_t parent_regex; static int exclude_other = 1; +static u64 sample_type; + struct ip_event { struct perf_event_header header; u64 ip; @@ -1135,7 +1137,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) } static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1147,12 +1149,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; - if (event->header.type & PERF_SAMPLE_PERIOD) { + if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1160,7 +1162,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); - if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + if (sample_type & PERF_SAMPLE_CALLCHAIN) { int i; chain = (void *)more_data; @@ -1352,10 +1354,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) { trace_event(event); - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); @@ -1388,18 +1390,21 @@ process_event(event_t *event, unsigned long offset, unsigned long head) static struct perf_header *header; -static int perf_header__has_sample(u64 sample_mask) +static u64 perf_header__sample_type(void) { + u64 sample_type = 0; int i; for (i = 0; i < header->attrs; i++) { struct perf_header_attr *attr = header->attr[i]; - if (!(attr->attr.sample_type & sample_mask)) - return 0; + if (!sample_type) + sample_type = attr->attr.sample_type; + else if (sample_type != attr->attr.sample_type) + die("non matching sample_type"); } - return 1; + return sample_type; } static int __cmd_report(void) @@ -1437,8 +1442,9 @@ static int __cmd_report(void) header = perf_header__read(input); head = header->data_offset; - if (sort__has_parent && - !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { + sample_type = perf_header__sample_type(); + + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5352b5e352ed..cf0d21f1ae10 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter) samples--; } -static void process_event(u64 ip, int counter) +static void process_event(u64 ip, int counter, int user) { samples++; - if (ip < min_ip || ip > max_ip) { + if (user) { userspace_samples++; return; } @@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md) old += size; - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { - if (event->header.type & PERF_SAMPLE_IP) - process_event(event->ip.ip, md->counter); + if (event->header.type == PERF_EVENT_SAMPLE) { + int user = + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; + process_event(event->ip.ip, md->counter, user); } } From 649c48a9e7fafcc72bfcc99471d9dea98d789d59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 21:12:48 +0200 Subject: [PATCH 081/741] perf-report: Add modes for inherited stats and no-samples Now that we can collect per task statistics, add modes that make use of that facility. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f4f0240d2302..798a56d890e5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -41,6 +41,8 @@ static int force = 0; static int append_file = 0; static int call_graph = 0; static int verbose = 0; +static int inherit_stat = 0; +static int no_samples = 0; static long samples; static struct timeval last_read; @@ -393,6 +395,12 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_freq = freq; } + if (no_samples) + attr->sample_freq = 0; + + if (inherit_stat) + attr->inherit_stat = 1; + if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; @@ -571,7 +579,7 @@ static int __cmd_record(int argc, const char **argv) } } - while (!done) { + for (;;) { int hits = samples; for (i = 0; i < nr_cpu; i++) { @@ -579,8 +587,11 @@ static int __cmd_record(int argc, const char **argv) mmap_read(&mmap_array[i][counter]); } - if (hits == samples) + if (hits == samples) { + if (done) + break; ret = poll(event_array, nr_poll, 100); + } } /* @@ -629,6 +640,10 @@ static const struct option options[] = { "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('s', "stat", &inherit_stat, + "per thread counts"), + OPT_BOOLEAN('n', "no-samples", &no_samples, + "don't sample"), OPT_END() }; From e9ea2fde7a07ae60a119171a2946ed2ae778271e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 22:46:04 +0200 Subject: [PATCH 082/741] perf-report: Add bare minimum PERF_EVENT_READ parsing Provide the basic infrastructure to provide per task stats. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec5361c67bf5..681c2233f882 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -100,6 +100,13 @@ struct lost_event { u64 lost; }; +struct read_event { + struct perf_event_header header; + u32 pid,tid; + u64 value; + u64 format[3]; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -108,6 +115,7 @@ typedef union event_union { struct fork_event fork; struct period_event period; struct lost_event lost; + struct read_event read; } event_t; static LIST_HEAD(dsos); @@ -1349,6 +1357,19 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static int +process_read_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->read.pid, + event->read.tid, + event->read.value); + + return 0; +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1373,6 +1394,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_LOST: return process_lost_event(event, offset, head); + case PERF_EVENT_READ: + return process_read_event(event, offset, head); + /* * We dont process them right now but they are fine: */ From 4418351f06d9ce73acc846158c20186965f920f3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 21:27:42 +0530 Subject: [PATCH 083/741] perf_counter tools: Add alias for 'l1d' and 'l1i' Add 'l1d' and 'l1i' aliases again as shortcuts - just dont make them the primary display alias. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1245945462.9157.11.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 430f06083201..4d042f104cdc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,8 +71,8 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-d$", "l1-d", "L1-data", }, - { "L1-i$", "l1-i", "L1-instruction", }, + { "L1-d$", "l1-d", "l1d", "L1-data", }, + { "L1-i$", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2" }, { "dTLB", "d-tlb", "Data-TLB", }, { "iTLB", "i-tlb", "Instruction-TLB", }, From 3928ddbe994cce1da1b6365b0db04d5765f254f4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Jun 2009 22:21:27 +0200 Subject: [PATCH 084/741] perf record: Fix unhandled io return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building latest perfcounter fails on the following error: builtin-record.c: In function ‘create_counter’: builtin-record.c:451: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result make: *** [builtin-record.o] Erreur 1 Just check if we successfully read the perf file descriptor. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1245961287-5327-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 798a56d890e5..d18546f37d7c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -448,7 +448,10 @@ try_again: } } - read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { + perror("Unable to read perf file descriptor\n"); + exit(-1); + } perf_header_attr__add_id(h_attr, read_data.id); From 789547508f22e482825f52f813b59680408ec2c7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:26:06 +0000 Subject: [PATCH 085/741] ide: fix ide_kill_rq() for special ide-{floppy,tape} driver requests Such requests should be failed with -EIO (like all other requests in this function) instead of being completed successfully. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 93b7886a2d6e..95db5f03f6a2 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -152,7 +152,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq) if ((media == ide_floppy || media == ide_tape) && drv_req) { rq->errors = 0; - ide_complete_rq(drive, 0, blk_rq_bytes(rq)); + ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); } else { if (media == ide_tape) rq->errors = IDE_DRV_ERROR_GENERAL; From 5e955245d6cf49c5ed26c7add7392ff5a6762bf4 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:27:27 +0000 Subject: [PATCH 086/741] ide: always kill the whole request on error * Use blk_rq_bytes() instead of obsolete ide_rq_bytes() in ide_kill_rq() and ide_floppy_do_request() for failed requests. [ bugfix part ] * Use blk_rq_bytes() instead of obsolete ide_rq_bytes() in ide_do_devset() and ide_complete_drive_reset(). Then remove ide_rq_bytes(). [ cleanup part ] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-devsets.c | 2 +- drivers/ide/ide-eh.c | 2 +- drivers/ide/ide-floppy.c | 2 +- drivers/ide/ide-io.c | 14 ++------------ include/linux/ide.h | 1 - 5 files changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index 5bf958e5b1d5..1099bf7cf968 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -183,6 +183,6 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) err = setfunc(drive, *(int *)&rq->cmd[1]); if (err) rq->errors = err; - ide_complete_rq(drive, err, ide_rq_bytes(rq)); + ide_complete_rq(drive, err, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 2b9141979613..e9abf2c3c335 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -149,7 +149,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) { if (err <= 0 && rq->errors == 0) rq->errors = -EIO; - ide_complete_rq(drive, err ? err : 0, ide_rq_bytes(rq)); + ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); } } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 8b3f204f7d73..fefbdfc8db06 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -293,7 +293,7 @@ out_end: drive->failed_pc = NULL; if (blk_fs_request(rq) == 0 && rq->errors == 0) rq->errors = -EIO; - ide_complete_rq(drive, -EIO, ide_rq_bytes(rq)); + ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 95db5f03f6a2..d5f3c77beadd 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -112,16 +112,6 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) } } -/* obsolete, blk_rq_bytes() should be used instead */ -unsigned int ide_rq_bytes(struct request *rq) -{ - if (blk_pc_request(rq)) - return blk_rq_bytes(rq); - else - return blk_rq_cur_sectors(rq) << 9; -} -EXPORT_SYMBOL_GPL(ide_rq_bytes); - int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes) { ide_hwif_t *hwif = drive->hwif; @@ -152,14 +142,14 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq) if ((media == ide_floppy || media == ide_tape) && drv_req) { rq->errors = 0; - ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); } else { if (media == ide_tape) rq->errors = IDE_DRV_ERROR_GENERAL; else if (blk_fs_request(rq) == 0 && rq->errors == 0) rq->errors = -EIO; - ide_complete_rq(drive, -EIO, ide_rq_bytes(rq)); } + + ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); } static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) diff --git a/include/linux/ide.h b/include/linux/ide.h index cf1f3888067c..c6af7c44d46c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1062,7 +1062,6 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l extern int ide_vlb_clk; extern int ide_pci_clk; -unsigned int ide_rq_bytes(struct request *); int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); From a80cad950f2a562e60db1869dd29bc007c5a4b66 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 26 Jun 2009 07:05:39 +0000 Subject: [PATCH 087/741] sh: ms7724se: Add sh_eth support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/sh/boards/mach-se/7724/setup.c | 106 +++++++++++++++++++++++++- arch/sh/include/mach-se/mach/se7724.h | 5 ++ 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 9cd04bd558b8..21d18005fb4a 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -272,6 +274,34 @@ static struct platform_device keysc_device = { }, }; +/* SH Eth */ +static struct resource sh_eth_resources[] = { + [0] = { + .start = SH_ETH_ADDR, + .end = SH_ETH_ADDR + 0x1FC, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 91, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, + }, +}; + +struct sh_eth_plat_data sh_eth_plat = { + .phy = 0x1f, /* SMSC LAN8187 */ + .edmac_endian = EDMAC_LITTLE_ENDIAN, +}; + +static struct platform_device sh_eth_device = { + .name = "sh-eth", + .id = 0, + .dev = { + .platform_data = &sh_eth_plat, + }, + .num_resources = ARRAY_SIZE(sh_eth_resources), + .resource = sh_eth_resources, +}; + static struct platform_device *ms7724se_devices[] __initdata = { &heartbeat_device, &smc91x_eth_device, @@ -280,8 +310,57 @@ static struct platform_device *ms7724se_devices[] __initdata = { &ceu0_device, &ceu1_device, &keysc_device, + &sh_eth_device, }; +#define EEPROM_OP 0xBA206000 +#define EEPROM_ADR 0xBA206004 +#define EEPROM_DATA 0xBA20600C +#define EEPROM_STAT 0xBA206010 +#define EEPROM_STRT 0xBA206014 +static int __init sh_eth_is_eeprom_ready(void) +{ + int t = 10000; + + while (t--) { + if (!ctrl_inw(EEPROM_STAT)) + return 1; + cpu_relax(); + } + + printk(KERN_ERR "ms7724se can not access to eeprom\n"); + return 0; +} + +static void __init sh_eth_init(void) +{ + int i; + u16 mac[3]; + + /* check EEPROM status */ + if (!sh_eth_is_eeprom_ready()) + return; + + /* read MAC addr from EEPROM */ + for (i = 0 ; i < 3 ; i++) { + ctrl_outw(0x0, EEPROM_OP); /* read */ + ctrl_outw(i*2, EEPROM_ADR); + ctrl_outw(0x1, EEPROM_STRT); + if (!sh_eth_is_eeprom_ready()) + return; + + mac[i] = ctrl_inw(EEPROM_DATA); + mac[i] = ((mac[i] & 0xFF) << 8) | (mac[i] >> 8); /* swap */ + } + + /* reset sh-eth */ + ctrl_outl(0x1, SH_ETH_ADDR + 0x0); + + /* set MAC addr */ + ctrl_outl(((mac[0] << 16) | (mac[1])), SH_ETH_MAHR); + ctrl_outl((mac[2]), SH_ETH_MALR); +} + #define SW4140 0xBA201000 #define FPGA_OUT 0xBA200400 #define PORT_HIZA 0xA4050158 @@ -302,7 +381,8 @@ static int __init devices_setup(void) ctrl_outw(ctrl_inw(FPGA_OUT) & ~((1 << 1) | /* LAN */ (1 << 6) | /* VIDEO DAC */ - (1 << 12)), /* USB0 */ + (1 << 12) | /* USB0 */ + (1 << 14)), /* RMII */ FPGA_OUT); /* enable IRQ 0,1,2 */ @@ -404,6 +484,28 @@ static int __init devices_setup(void) gpio_request(GPIO_FN_KEYOUT1, NULL); gpio_request(GPIO_FN_KEYOUT0, NULL); + /* + * enable SH-Eth + * + * please remove J33 pin from your board !! + * + * ms7724 board should not use GPIO_FN_LNKSTA pin + * So, This time PTX5 is set to input pin + */ + gpio_request(GPIO_FN_RMII_RXD0, NULL); + gpio_request(GPIO_FN_RMII_RXD1, NULL); + gpio_request(GPIO_FN_RMII_TXD0, NULL); + gpio_request(GPIO_FN_RMII_TXD1, NULL); + gpio_request(GPIO_FN_RMII_REF_CLK, NULL); + gpio_request(GPIO_FN_RMII_TX_EN, NULL); + gpio_request(GPIO_FN_RMII_RX_ER, NULL); + gpio_request(GPIO_FN_RMII_CRS_DV, NULL); + gpio_request(GPIO_FN_MDIO, NULL); + gpio_request(GPIO_FN_MDC, NULL); + gpio_request(GPIO_PTX5, NULL); + gpio_direction_input(GPIO_PTX5); + sh_eth_init(); + if (sw & SW41_B) { /* SVGA */ lcdc_info.ch[0].lcd_cfg.xres = 800; @@ -437,7 +539,7 @@ static int __init devices_setup(void) } return platform_add_devices(ms7724se_devices, - ARRAY_SIZE(ms7724se_devices)); + ARRAY_SIZE(ms7724se_devices)); } device_initcall(devices_setup); diff --git a/arch/sh/include/mach-se/mach/se7724.h b/arch/sh/include/mach-se/mach/se7724.h index 74164b60d0db..29514a39d0f5 100644 --- a/arch/sh/include/mach-se/mach/se7724.h +++ b/arch/sh/include/mach-se/mach/se7724.h @@ -20,6 +20,11 @@ */ #include +/* SH Eth */ +#define SH_ETH_ADDR (0xA4600000) +#define SH_ETH_MAHR (SH_ETH_ADDR + 0x1C0) +#define SH_ETH_MALR (SH_ETH_ADDR + 0x1C8) + #define PA_LED (0xba203000) /* 8bit LED */ #define IRQ_MODE (0xba200010) #define IRQ0_SR (0xba200014) From 7ed9f7e5db58c6e8c2b4b738a75d5dcd8e17aad5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Jun 2009 12:31:37 -0700 Subject: [PATCH 088/741] fix RCU-callback-after-kmem_cache_destroy problem in sl[aou]b Jesper noted that kmem_cache_destroy() invokes synchronize_rcu() rather than rcu_barrier() in the SLAB_DESTROY_BY_RCU case, which could result in RCU callbacks accessing a kmem_cache after it had been destroyed. Cc: Acked-by: Matt Mackall Reported-by: Jesper Dangaard Brouer Signed-off-by: Paul E. McKenney Signed-off-by: Pekka Enberg --- mm/slab.c | 2 +- mm/slob.c | 2 ++ mm/slub.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index e74a16e4ced6..5241b6598ba3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2547,7 +2547,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) } if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) - synchronize_rcu(); + rcu_barrier(); __kmem_cache_destroy(cachep); mutex_unlock(&cache_chain_mutex); diff --git a/mm/slob.c b/mm/slob.c index c78742defdc6..9641da3d5e58 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -595,6 +595,8 @@ EXPORT_SYMBOL(kmem_cache_create); void kmem_cache_destroy(struct kmem_cache *c) { kmemleak_free(c); + if (c->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); slob_free(c, sizeof(struct kmem_cache)); } EXPORT_SYMBOL(kmem_cache_destroy); diff --git a/mm/slub.c b/mm/slub.c index 819f056b39c6..a9201d83178b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2595,6 +2595,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) */ void kmem_cache_destroy(struct kmem_cache *s) { + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); down_write(&slub_lock); s->refcount--; if (!s->refcount) { From 8cb76d99d715741637b6d0884f389e17e9cb05d2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:00 +0200 Subject: [PATCH 089/741] perf_counter tools: Prepare a small callchain framework We plan to display the callchains depending on some user-configurable parameters. To gather the callchains stats from the recorded stream in a fast way, this patch introduces an ad hoc radix tree adapted for callchains and also a rbtree to sort these callchains once we have gathered every events from the stream. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-report.c | 5 -- tools/perf/perf.h | 5 ++ tools/perf/util/callchain.c | 174 ++++++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 33 +++++++ 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/callchain.c create mode 100644 tools/perf/util/callchain.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d3887ed51a64..1c1296d8a64b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -329,6 +329,7 @@ LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o +LIB_OBJS += util/callchain.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 681c2233f882..28d1cb2127e9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,11 +62,6 @@ struct ip_event { unsigned char __more_data[]; }; -struct ip_callchain { - u64 nr; - u64 ips[0]; -}; - struct mmap_event { struct perf_event_header header; u32 pid, tid; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 16c84fd73c86..a49842b69a59 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,4 +66,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 +struct ip_callchain { + u64 nr; + u64 ips[0]; +}; + #endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c new file mode 100644 index 000000000000..ad3c28578961 --- /dev/null +++ b/tools/perf/util/callchain.c @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2009, Frederic Weisbecker + * + * Handle the callchains from the stream in an ad-hoc radix tree and then + * sort them in an rbtree. + * + */ + +#include +#include +#include +#include + +#include "callchain.h" + + +static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct callchain_node *rnode; + + while (*p) { + parent = *p; + rnode = rb_entry(parent, struct callchain_node, rb_node); + + if (rnode->hit < chain->hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&chain->rb_node, parent, p); + rb_insert_color(&chain->rb_node, root); +} + +/* + * Once we get every callchains from the stream, we can now + * sort them by hit + */ +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) +{ + struct callchain_node *child; + + list_for_each_entry(child, &node->children, brothers) + sort_chain_to_rbtree(rb_root, child); + + if (node->hit) + rb_insert_callchain(rb_root, node); +} + +static struct callchain_node *create_child(struct callchain_node *parent) +{ + struct callchain_node *new; + + new = malloc(sizeof(*new)); + if (!new) { + perror("not enough memory to create child for code path tree"); + return NULL; + } + new->parent = parent; + INIT_LIST_HEAD(&new->children); + INIT_LIST_HEAD(&new->val); + list_add_tail(&new->brothers, &parent->children); + + return new; +} + +static void +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +{ + int i; + + for (i = start; i < chain->nr; i++) { + struct callchain_list *call; + + call = malloc(sizeof(*chain)); + if (!call) { + perror("not enough memory for the code path tree"); + return; + } + call->ip = chain->ips[i]; + list_add_tail(&call->list, &node->val); + } + node->val_nr = i - start; +} + +static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +{ + struct callchain_node *new; + + new = create_child(parent); + fill_node(new, chain, parent->val_nr); + + new->hit = 1; +} + +static void +split_add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct callchain_list *to_split, int idx) +{ + struct callchain_node *new; + + /* split */ + new = create_child(parent); + list_move_tail(&to_split->list, &new->val); + new->hit = parent->hit; + parent->hit = 0; + parent->val_nr = idx; + + /* create the new one */ + add_child(parent, chain); +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start); + +static int +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +{ + struct callchain_node *rnode; + + /* lookup in childrens */ + list_for_each_entry(rnode, &root->children, brothers) { + int ret = __append_chain(rnode, chain, root->val_nr); + if (!ret) + return 0; + } + return -1; +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start) +{ + struct callchain_list *cnode; + int i = start; + bool found = false; + + /* lookup in the current node */ + list_for_each_entry(cnode, &root->val, list) { + if (cnode->ip != chain->ips[i++]) + break; + if (!found) + found = true; + if (i == chain->nr) + break; + } + + /* matches not, relay on the parent */ + if (!found) + return -1; + + /* we match only a part of the node. Split it and add the new chain */ + if (i < root->val_nr) { + split_add_child(root, chain, cnode, i); + return 0; + } + + /* we match 100% of the path, increment the hit */ + if (i == root->val_nr) { + root->hit++; + return 0; + } + + return __append_chain_children(root, chain); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain) +{ + if (__append_chain_children(root, chain) == -1) + add_child(root, chain); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h new file mode 100644 index 000000000000..fa1cd2f71fd3 --- /dev/null +++ b/tools/perf/util/callchain.h @@ -0,0 +1,33 @@ +#ifndef __PERF_CALLCHAIN_H +#define __PERF_CALLCHAIN_H + +#include "../perf.h" +#include "list.h" +#include "rbtree.h" + + +struct callchain_node { + struct callchain_node *parent; + struct list_head brothers; + struct list_head children; + struct list_head val; + struct rb_node rb_node; + int val_nr; + int hit; +}; + +struct callchain_list { + unsigned long ip; + struct list_head list; +}; + +static inline void callchain_init(struct callchain_node *node) +{ + INIT_LIST_HEAD(&node->brothers); + INIT_LIST_HEAD(&node->children); + INIT_LIST_HEAD(&node->val); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); +#endif From f55c555226b1010b249730ec6b232e5470286950 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:01 +0200 Subject: [PATCH 090/741] perf report: Print sorted callchains per histogram entries Use the newly created callchains radix tree to gather the chains stats from the recorded events and then print the callchains for all of them, sorted by hits, using the "-c" parameter with perf report. Example: 66.15% [k] atm_clip_exit 63.08% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c 0xffffffff810194f3 0xffffffff8106ab7f 0xffffffff8106abe5 0xffffffff8106acde 0xffffffff8100d94b 0xffffffff8153e7ea [...] 1.54% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c [...] Symbols are not yet resolved. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 78 ++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 28d1cb2127e9..ed391db9e0f8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/rbtree.h" #include "util/symbol.h" #include "util/string.h" +#include "util/callchain.h" #include "perf.h" #include "util/header.h" @@ -52,6 +53,7 @@ static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; static int exclude_other = 1; +static int callchain; static u64 sample_type; @@ -488,17 +490,19 @@ static size_t threads__fprintf(FILE *fp) static struct rb_root hist; struct hist_entry { - struct rb_node rb_node; + struct rb_node rb_node; - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - struct symbol *parent; - u64 ip; - char level; + struct thread *thread; + struct map *map; + struct dso *dso; + struct symbol *sym; + struct symbol *parent; + u64 ip; + char level; + struct callchain_node callchain; + struct rb_root sorted_chain; - u64 count; + u64 count; }; /* @@ -768,6 +772,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } +static size_t +callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += callchain__fprintf(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) + ret += fprintf(fp, " %p\n", (void *)chain->ip); + + return ret; +} + +static size_t +hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, + u64 total_samples) +{ + struct rb_node *rb_node; + struct callchain_node *chain; + size_t ret = 0; + + rb_node = rb_first(&self->sorted_chain); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + ret += fprintf(fp, " %6.2f%%\n", percent); + ret += callchain__fprintf(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + rb_node = rb_next(rb_node); + } + + return ret; +} + + static size_t hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { @@ -808,6 +854,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret += fprintf(fp, "\n"); + if (callchain) + hist_entry_callchain__fprintf(fp, self, total_samples); + return ret; } @@ -892,6 +941,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .level = level, .count = count, .parent = NULL, + .sorted_chain = RB_ROOT }; int cmp; @@ -934,6 +984,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; + if (callchain) + append_chain(&he->callchain, chain); return 0; } @@ -947,6 +999,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!he) return -ENOMEM; *he = entry; + if (callchain) { + callchain_init(&he->callchain); + append_chain(&he->callchain, chain); + } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); @@ -1023,6 +1079,9 @@ static void output__insert_entry(struct hist_entry *he) struct rb_node *parent = NULL; struct hist_entry *iter; + if (callchain) + sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node); @@ -1599,6 +1658,7 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), + OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_END() }; From 39562e783928e3ea9ee2cbce99a756ab48d3c06a Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Fri, 26 Jun 2009 16:30:43 +0200 Subject: [PATCH 091/741] [SCSI] FC transport: Locking fix for common-code FC pass-through patch Fix this: ------------[ cut here ]------------ Badness at block/blk-core.c:244 CPU: 0 Tainted: G W 2.6.31-rc1-00004-gd3a263a #3 Process zfcp_wq (pid: 901, task: 000000002fb7a038, ksp: 000000002f02bc78) Krnl PSW : 0704300180000000 00000000002141ba (blk_remove_plug+0xb2/0xb8) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3 Krnl GPRS: 0000000000000001 0000000000000001 0000000022811440 0000000022811798 000000000027ff4e 0000000000000000 0000000000000000 000000002f00f000 070000000006a0f4 000000002af70000 000000002af2a800 00000000228d1c00 0000000022811440 000000000050c708 000000002f02bca8 000000002f02bc80 Krnl Code: 00000000002141b0: b9140022 lgfr %r2,%r2 00000000002141b4: 07fe bcr 15,%r14 00000000002141b6: a7f40001 brc 15,2141b8 >00000000002141ba: a7f4ffbe brc 15,214136 00000000002141be: 0707 bcr 0,%r7 00000000002141c0: ebaff0680024 stmg %r10,%r15,104(%r15) 00000000002141c6: c0d00017c2a9 larl %r13,50c718 00000000002141cc: a7f13fc0 tmll %r15,16320 Call Trace: ([<000000000050e7d8>] C.272.16122+0x88/0x110) [<00000000002141ec>] __blk_run_queue+0x2c/0x154 [<000000000028013a>] fc_remote_port_add+0x85e/0x95c [<000000000037596e>] zfcp_scsi_rport_work+0xe6/0x148 [<000000000006908c>] worker_thread+0x25c/0x318 [<000000000006f10c>] kthread+0x94/0x9c [<000000000001c2b2>] kernel_thread_starter+0x6/0xc [<000000000001c2ac>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Last Breaking-Event-Address: [<00000000002141b6>] blk_remove_plug+0xae/0xb8 The FC pass-through support triggers the WARN_ON(!irqs_disabled()) in blk_plug_device. Since blk_plug_device requires being called with disabled interrupts, use spin_lock_irqsave in fc_bsg_goose_queue to disable the interrupts before calling into the block layer. Signed-off-by: Christof Schmitt Acked-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 2eee9e6e4fe8..292c02f810d0 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3670,13 +3670,14 @@ static void fc_bsg_goose_queue(struct fc_rport *rport) { int flagset; + unsigned long flags; if (!rport->rqst_q) return; get_device(&rport->dev); - spin_lock(rport->rqst_q->queue_lock); + spin_lock_irqsave(rport->rqst_q->queue_lock, flags); flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); if (flagset) @@ -3684,7 +3685,7 @@ fc_bsg_goose_queue(struct fc_rport *rport) __blk_run_queue(rport->rqst_q); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); - spin_unlock(rport->rqst_q->queue_lock); + spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); put_device(&rport->dev); } From 19d2e755436054dfc2be640bffc32e427c37ac3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Jun 2009 13:10:23 +0200 Subject: [PATCH 092/741] perf_counter: Complete counter swap Complete the counter swap by indeed switching the times too and updating the userpage after modifying the counter values. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246014623.31755.195.camel@twins> Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f2f232696587..66ab1e9d1294 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1048,9 +1048,14 @@ static void __perf_counter_sync_stat(struct perf_counter *counter, value = atomic64_xchg(&counter->count, value); atomic64_set(&next_counter->count, value); + swap(counter->total_time_enabled, next_counter->total_time_enabled); + swap(counter->total_time_running, next_counter->total_time_running); + /* - * XXX also sync time_enabled and time_running ? + * Since we swizzled the values, update the user visible data too. */ + perf_counter_update_userpage(counter); + perf_counter_update_userpage(next_counter); } #define list_next_entry(pos, member) \ From e0a2a1601bec01243bcad44414d06f59dae2eedb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 26 Jun 2009 17:38:25 +0100 Subject: [PATCH 093/741] kmemleak: Enable task stacks scanning by default This is to reduce the number of false positives reported. Signed-off-by: Catalin Marinas --- Documentation/kmemleak.txt | 8 ++++---- mm/kmemleak.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index f655308064d7..9426e94f291a 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -31,12 +31,12 @@ Memory scanning parameters can be modified at run-time by writing to the /sys/kernel/debug/kmemleak file. The following parameters are supported: off - disable kmemleak (irreversible) - stack=on - enable the task stacks scanning + stack=on - enable the task stacks scanning (default) stack=off - disable the tasks stacks scanning - scan=on - start the automatic memory scanning thread + scan=on - start the automatic memory scanning thread (default) scan=off - stop the automatic memory scanning thread - scan= - set the automatic memory scanning period in seconds (0 - to disable it) + scan= - set the automatic memory scanning period in seconds + (default 600, 0 to stop the automatic scanning) Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on the kernel command line. diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 17096d1b59b2..a38418a95d33 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -194,7 +194,7 @@ static unsigned long jiffies_min_age; /* delay between automatic memory scannings */ static signed long jiffies_scan_wait; /* enables or disables the task stacks scanning */ -static int kmemleak_stack_scan; +static int kmemleak_stack_scan = 1; /* mutex protecting the memory scanning */ static DEFINE_MUTEX(scan_mutex); /* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ From bab4a34afc301fdb81b6ea0e3098d96fc356e03a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 26 Jun 2009 17:38:26 +0100 Subject: [PATCH 094/741] kmemleak: Simplify the reports logged by the scanning thread Because of false positives, the memory scanning thread may print too much information. This patch changes the scanning thread to only print the number of newly suspected leaks. Further information can be read from the /sys/kernel/debug/kmemleak file. Signed-off-by: Catalin Marinas --- Documentation/kmemleak.txt | 6 ++-- mm/kmemleak.c | 61 ++++++++++---------------------------- 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index 9426e94f291a..c06f7ba64993 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -16,9 +16,9 @@ Usage ----- CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel -thread scans the memory every 10 minutes (by default) and prints any new -unreferenced objects found. To trigger an intermediate scan and display -all the possible memory leaks: +thread scans the memory every 10 minutes (by default) and prints the +number of new unreferenced objects found. To trigger an intermediate +scan and display the details of all the possible memory leaks: # mount -t debugfs nodev /sys/kernel/debug/ # cat /sys/kernel/debug/kmemleak diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a38418a95d33..4130a4889fa9 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -278,15 +278,6 @@ static int color_gray(const struct kmemleak_object *object) return object->min_count != -1 && object->count >= object->min_count; } -/* - * Objects are considered referenced if their color is gray and they have not - * been deleted. - */ -static int referenced_object(struct kmemleak_object *object) -{ - return (object->flags & OBJECT_ALLOCATED) && color_gray(object); -} - /* * Objects are considered unreferenced only if their color is white, they have * not be deleted and have a minimum age to avoid false positives caused by @@ -299,38 +290,23 @@ static int unreferenced_object(struct kmemleak_object *object) } /* - * Printing of the (un)referenced objects information, either to the seq file - * or to the kernel log. The print_referenced/print_unreferenced functions - * must be called with the object->lock held. + * Printing of the unreferenced objects information to the seq file. The + * print_unreferenced function must be called with the object->lock held. */ -#define print_helper(seq, x...) do { \ - struct seq_file *s = (seq); \ - if (s) \ - seq_printf(s, x); \ - else \ - pr_info(x); \ -} while (0) - -static void print_referenced(struct kmemleak_object *object) -{ - pr_info("referenced object 0x%08lx (size %zu)\n", - object->pointer, object->size); -} - static void print_unreferenced(struct seq_file *seq, struct kmemleak_object *object) { int i; - print_helper(seq, "unreferenced object 0x%08lx (size %zu):\n", - object->pointer, object->size); - print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", - object->comm, object->pid, object->jiffies); - print_helper(seq, " backtrace:\n"); + seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n", + object->pointer, object->size); + seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", + object->comm, object->pid, object->jiffies); + seq_printf(seq, " backtrace:\n"); for (i = 0; i < object->trace_len; i++) { void *ptr = (void *)object->trace[i]; - print_helper(seq, " [<%p>] %pS\n", ptr, ptr); + seq_printf(seq, " [<%p>] %pS\n", ptr, ptr); } } @@ -571,8 +547,6 @@ static void delete_object(unsigned long ptr) * cannot be freed when it is being scanned. */ spin_lock_irqsave(&object->lock, flags); - if (object->flags & OBJECT_REPORTED) - print_referenced(object); object->flags &= ~OBJECT_ALLOCATED; spin_unlock_irqrestore(&object->lock, flags); put_object(object); @@ -1073,33 +1047,30 @@ static int kmemleak_scan_thread(void *arg) while (!kthread_should_stop()) { struct kmemleak_object *object; signed long timeout = jiffies_scan_wait; + int new_leaks = 0; mutex_lock(&scan_mutex); kmemleak_scan(); - reported_leaks = 0; rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { unsigned long flags; - if (reported_leaks >= REPORTS_NR) - break; spin_lock_irqsave(&object->lock, flags); - if (!(object->flags & OBJECT_REPORTED) && - unreferenced_object(object)) { - print_unreferenced(NULL, object); + if (unreferenced_object(object) && + !(object->flags & OBJECT_REPORTED)) { object->flags |= OBJECT_REPORTED; - reported_leaks++; - } else if ((object->flags & OBJECT_REPORTED) && - referenced_object(object)) { - print_referenced(object); - object->flags &= ~OBJECT_REPORTED; + new_leaks++; } spin_unlock_irqrestore(&object->lock, flags); } rcu_read_unlock(); + if (new_leaks) + pr_info("%d new suspected memory leaks (see " + "/sys/kernel/debug/kmemleak)\n", new_leaks); + mutex_unlock(&scan_mutex); /* wait before the next scan */ while (timeout && !kthread_should_stop()) From 4698c1f2bbe44ce852ef1a6716973c1f5401a4c4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 26 Jun 2009 17:38:27 +0100 Subject: [PATCH 095/741] kmemleak: Do not trigger a scan when reading the debug/kmemleak file Since there is a kernel thread for automatically scanning the memory, it makes sense for the debug/kmemleak file to only show its findings. This patch also adds support for "echo scan > debug/kmemleak" to trigger an intermediate memory scan and eliminates the kmemleak_mutex (scan_mutex covers all the cases now). Signed-off-by: Catalin Marinas --- Documentation/kmemleak.txt | 9 +++- mm/kmemleak.c | 92 +++++++++++++++++--------------------- 2 files changed, 48 insertions(+), 53 deletions(-) diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index c06f7ba64993..89068030b01b 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -17,12 +17,16 @@ Usage CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel thread scans the memory every 10 minutes (by default) and prints the -number of new unreferenced objects found. To trigger an intermediate -scan and display the details of all the possible memory leaks: +number of new unreferenced objects found. To display the details of all +the possible memory leaks: # mount -t debugfs nodev /sys/kernel/debug/ # cat /sys/kernel/debug/kmemleak +To trigger an intermediate memory scan: + + # echo scan > /sys/kernel/debug/kmemleak + Note that the orphan objects are listed in the order they were allocated and one object at the beginning of the list may cause other subsequent objects to be reported as orphan. @@ -37,6 +41,7 @@ Memory scanning parameters can be modified at run-time by writing to the scan=off - stop the automatic memory scanning thread scan= - set the automatic memory scanning period in seconds (default 600, 0 to stop the automatic scanning) + scan - trigger a memory scan Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on the kernel command line. diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 4130a4889fa9..e96e0ec6a56e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -48,10 +48,10 @@ * scanned. This list is only modified during a scanning episode when the * scan_mutex is held. At the end of a scan, the gray_list is always empty. * Note that the kmemleak_object.use_count is incremented when an object is - * added to the gray_list and therefore cannot be freed - * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs - * file together with modifications to the memory scanning parameters - * including the scan_thread pointer + * added to the gray_list and therefore cannot be freed. This mutex also + * prevents multiple users of the "kmemleak" debugfs file together with + * modifications to the memory scanning parameters including the scan_thread + * pointer * * The kmemleak_object structures have a use_count incremented or decremented * using the get_object()/put_object() functions. When the use_count becomes @@ -195,10 +195,8 @@ static unsigned long jiffies_min_age; static signed long jiffies_scan_wait; /* enables or disables the task stacks scanning */ static int kmemleak_stack_scan = 1; -/* mutex protecting the memory scanning */ +/* protects the memory scanning, parameters and debug/kmemleak file access */ static DEFINE_MUTEX(scan_mutex); -/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ -static DEFINE_MUTEX(kmemleak_mutex); /* number of leaks reported (for limitation purposes) */ static int reported_leaks; @@ -927,6 +925,7 @@ static void kmemleak_scan(void) struct kmemleak_object *object, *tmp; struct task_struct *task; int i; + int new_leaks = 0; /* prepare the kmemleak_object's */ rcu_read_lock(); @@ -1024,6 +1023,26 @@ static void kmemleak_scan(void) object = tmp; } WARN_ON(!list_empty(&gray_list)); + + /* + * Scanning result reporting. + */ + rcu_read_lock(); + list_for_each_entry_rcu(object, &object_list, object_list) { + spin_lock_irqsave(&object->lock, flags); + if (unreferenced_object(object) && + !(object->flags & OBJECT_REPORTED)) { + object->flags |= OBJECT_REPORTED; + new_leaks++; + } + spin_unlock_irqrestore(&object->lock, flags); + } + rcu_read_unlock(); + + if (new_leaks) + pr_info("%d new suspected memory leaks (see " + "/sys/kernel/debug/kmemleak)\n", new_leaks); + } /* @@ -1045,33 +1064,12 @@ static int kmemleak_scan_thread(void *arg) } while (!kthread_should_stop()) { - struct kmemleak_object *object; signed long timeout = jiffies_scan_wait; - int new_leaks = 0; mutex_lock(&scan_mutex); - kmemleak_scan(); - - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { - unsigned long flags; - - spin_lock_irqsave(&object->lock, flags); - if (unreferenced_object(object) && - !(object->flags & OBJECT_REPORTED)) { - object->flags |= OBJECT_REPORTED; - new_leaks++; - } - spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - - if (new_leaks) - pr_info("%d new suspected memory leaks (see " - "/sys/kernel/debug/kmemleak)\n", new_leaks); - mutex_unlock(&scan_mutex); + /* wait before the next scan */ while (timeout && !kthread_should_stop()) timeout = schedule_timeout_interruptible(timeout); @@ -1084,7 +1082,7 @@ static int kmemleak_scan_thread(void *arg) /* * Start the automatic memory scanning thread. This function must be called - * with the kmemleak_mutex held. + * with the scan_mutex held. */ void start_scan_thread(void) { @@ -1099,7 +1097,7 @@ void start_scan_thread(void) /* * Stop the automatic memory scanning thread. This function must be called - * with the kmemleak_mutex held. + * with the scan_mutex held. */ void stop_scan_thread(void) { @@ -1119,10 +1117,8 @@ static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) struct kmemleak_object *object; loff_t n = *pos; - if (!n) { - kmemleak_scan(); + if (!n) reported_leaks = 0; - } if (reported_leaks >= REPORTS_NR) return NULL; @@ -1206,13 +1202,10 @@ static int kmemleak_open(struct inode *inode, struct file *file) if (!atomic_read(&kmemleak_enabled)) return -EBUSY; - ret = mutex_lock_interruptible(&kmemleak_mutex); + ret = mutex_lock_interruptible(&scan_mutex); if (ret < 0) goto out; if (file->f_mode & FMODE_READ) { - ret = mutex_lock_interruptible(&scan_mutex); - if (ret < 0) - goto kmemleak_unlock; ret = seq_open(file, &kmemleak_seq_ops); if (ret < 0) goto scan_unlock; @@ -1221,8 +1214,6 @@ static int kmemleak_open(struct inode *inode, struct file *file) scan_unlock: mutex_unlock(&scan_mutex); -kmemleak_unlock: - mutex_unlock(&kmemleak_mutex); out: return ret; } @@ -1231,11 +1222,9 @@ static int kmemleak_release(struct inode *inode, struct file *file) { int ret = 0; - if (file->f_mode & FMODE_READ) { + if (file->f_mode & FMODE_READ) seq_release(inode, file); - mutex_unlock(&scan_mutex); - } - mutex_unlock(&kmemleak_mutex); + mutex_unlock(&scan_mutex); return ret; } @@ -1250,6 +1239,7 @@ static int kmemleak_release(struct inode *inode, struct file *file) * scan=off - stop the automatic memory scanning thread * scan=... - set the automatic memory scanning period in seconds (0 to * disable it) + * scan - trigger a memory scan */ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, size_t size, loff_t *ppos) @@ -1287,7 +1277,9 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, jiffies_scan_wait = msecs_to_jiffies(secs * 1000); start_scan_thread(); } - } else + } else if (strncmp(buf, "scan", 4) == 0) + kmemleak_scan(); + else return -EINVAL; /* ignore the rest of the buffer, only one command at a time */ @@ -1312,11 +1304,9 @@ static int kmemleak_cleanup_thread(void *arg) { struct kmemleak_object *object; - mutex_lock(&kmemleak_mutex); - stop_scan_thread(); - mutex_unlock(&kmemleak_mutex); - mutex_lock(&scan_mutex); + stop_scan_thread(); + rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) delete_object(object->pointer); @@ -1458,9 +1448,9 @@ static int __init kmemleak_late_init(void) &kmemleak_fops); if (!dentry) pr_warning("Failed to create the debugfs kmemleak file\n"); - mutex_lock(&kmemleak_mutex); + mutex_lock(&scan_mutex); start_scan_thread(); - mutex_unlock(&kmemleak_mutex); + mutex_unlock(&scan_mutex); pr_info("Kernel memory leak detector initialized\n"); From acf4968ec9dea49387ca8b3d36dfaa0850bdb2d5 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 26 Jun 2009 17:38:29 +0100 Subject: [PATCH 096/741] kmemleak: Slightly change the policy on newly allocated objects Newly allocated objects are more likely to be reported as false positives. Kmemleak ignores the reporting of objects younger than 5 seconds. However, this age was calculated after the memory scanning completed which usually takes longer than 5 seconds. This patch make the minimum object age calculation in relation to the start of the memory scanning. Signed-off-by: Catalin Marinas --- mm/kmemleak.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e96e0ec6a56e..c37e8e50e4de 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -190,7 +190,9 @@ static unsigned long max_addr; static unsigned long next_scan_yield; static struct task_struct *scan_thread; static unsigned long jiffies_scan_yield; +/* used to avoid reporting of recently allocated objects */ static unsigned long jiffies_min_age; +static unsigned long jiffies_last_scan; /* delay between automatic memory scannings */ static signed long jiffies_scan_wait; /* enables or disables the task stacks scanning */ @@ -284,7 +286,8 @@ static int color_gray(const struct kmemleak_object *object) static int unreferenced_object(struct kmemleak_object *object) { return (object->flags & OBJECT_ALLOCATED) && color_white(object) && - time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); + time_before_eq(object->jiffies + jiffies_min_age, + jiffies_last_scan); } /* @@ -927,6 +930,8 @@ static void kmemleak_scan(void) int i; int new_leaks = 0; + jiffies_last_scan = jiffies; + /* prepare the kmemleak_object's */ rcu_read_lock(); list_for_each_entry_rcu(object, &object_list, object_list) { From 89bb871e96cdc3d78b7f69f0bacc94b21bbaccfd Mon Sep 17 00:00:00 2001 From: "Steven A. Falco" Date: Fri, 26 Jun 2009 12:42:47 -0400 Subject: [PATCH 097/741] mtd: m25p80 timeout too short for worst-case m25p16 devices The m25p16 data sheet from numonyx lists the worst-case bulk erase time (tBE) as 40 seconds. Signed-off-by: Steven A. Falco Signed-off-by: David Woodhouse --- drivers/mtd/devices/m25p80.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 59c46126a5ce..ae5fe91867e1 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -54,7 +54,7 @@ #define SR_SRWD 0x80 /* SR write protect */ /* Define max times to check status register before we give up. */ -#define MAX_READY_WAIT_JIFFIES (10 * HZ) /* eg. M25P128 specs 6s max sector erase */ +#define MAX_READY_WAIT_JIFFIES (40 * HZ) /* M25P16 specs 40s max chip erase */ #define CMD_SIZE 4 #ifdef CONFIG_M25PXX_USE_FAST_READ From 9c72ebef5aabf3532469d602a9d87beceea268b1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jun 2009 11:22:37 -0700 Subject: [PATCH 098/741] ide-cd: handle fragmented packet commands gracefully There are some devices in the wild that clear the DRQ bit during the last word of a packet command and therefore could use a "second chance" for that last word of data to be xferred instead of simply failing the request. Do that by attempting to suck in those last bytes in PIO mode. In addition, the ATA_ERR bit has to be cleared for we cannot be sure the data is valid otherwise. See http://bugzilla.kernel.org/show_bug.cgi?id=13399 for details. Signed-off-by: Borislav Petkov Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-cd.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f0ede5953af8..6a9a769bffc1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -592,9 +592,19 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) } } else if (!blk_pc_request(rq)) { ide_cd_request_sense_fixup(drive, cmd); - /* complain if we still have data left to transfer */ + uptodate = cmd->nleft ? 0 : 1; - if (uptodate == 0) + + /* + * suck out the remaining bytes from the drive in an + * attempt to complete the data xfer. (see BZ#13399) + */ + if (!(stat & ATA_ERR) && !uptodate && thislen) { + ide_pio_bytes(drive, cmd, write, thislen); + uptodate = cmd->nleft ? 0 : 1; + } + + if (!uptodate) rq->cmd_flags |= REQ_FAILED; } goto out_end; From a32c7765e2796395aec49f699bd25c407155e9c5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 26 Jun 2009 16:55:51 +0800 Subject: [PATCH 099/741] tracing: Fix stack tracer sysctl handling This made my machine completely frozen: # echo 1 > /proc/sys/kernel/stack_tracer_enabled # echo 2 > /proc/sys/kernel/stack_tracer_enabled The cause is register_ftrace_function() was called twice. Also fix ftrace_enabled sysctl, though seems nothing bad happened as I tested it. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A448D17.9010305@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace_stack.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f3716bf04df6..bce9e01a29c8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3160,10 +3160,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ret = proc_dointvec(table, write, file, buffer, lenp, ppos); - if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) + if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; - last_ftrace_enabled = ftrace_enabled; + last_ftrace_enabled = !!ftrace_enabled; if (ftrace_enabled) { diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 2d7aebd71dbd..e644af910124 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write, ret = proc_dointvec(table, write, file, buffer, lenp, ppos); if (ret || !write || - (last_stack_tracer_enabled == stack_tracer_enabled)) + (last_stack_tracer_enabled == !!stack_tracer_enabled)) goto out; - last_stack_tracer_enabled = stack_tracer_enabled; + last_stack_tracer_enabled = !!stack_tracer_enabled; if (stack_tracer_enabled) register_ftrace_function(&trace_ops); From 82d5308127c3e3404ffbf41e503853c68660b18b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 26 Jun 2009 17:07:02 +0800 Subject: [PATCH 100/741] trace_export: Repair missed fields Some fields for struct ftrace_graph_ret are missed when they are exported to user. Signed-off-by: Lai Jiangshan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <4A448FB6.5000302@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_event_types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 5e32e375134d..6db005e12487 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, ftrace_graph_ret_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ret.func, func) + TRACE_FIELD(unsigned long long, ret.calltime, calltime) + TRACE_FIELD(unsigned long long, ret.rettime, rettime) + TRACE_FIELD(unsigned long, ret.overrun, overrun) TRACE_FIELD(int, ret.depth, depth) ), TP_RAW_FMT("<-- %lx (%d)") From 73f1d9391a6aa72efdcea2f302ee7bfcd313c631 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 24 Jun 2009 01:04:36 +0900 Subject: [PATCH 101/741] asm-generic/vmlinux.lds.h: Fix up RW_DATA_SECTION definition. RW_DATA_SECTION is defined to take 4 different alignment parameters, while NOSAVE_DATA currently uses a fixed PAGE_SIZE alignment as noted in the comments. There are presently no in-tree users of this at present, and I just stumbled across this while implementing the simplified script on a new architecture port, which subsequently resulted in a syntax error. Signed-off-by: Paul Mundt Signed-off-by: Sam Ravnborg --- include/asm-generic/vmlinux.lds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 92b73b6140ff..f92e730695c8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -704,7 +704,7 @@ * matches the requirment of PAGE_ALIGNED_DATA. * * use 0 as page_align if page_aligned data is not used */ -#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask) \ +#define RW_DATA_SECTION(cacheline, pagealigned, inittask) \ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ INIT_TASK(inittask) \ @@ -712,7 +712,7 @@ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ CONSTRUCTORS \ - NOSAVE_DATA(nosave) \ + NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ } From d2af12aeadaedf657c9fb9c3df984d2c5ab25f4c Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Tue, 23 Jun 2009 19:59:35 -0400 Subject: [PATCH 102/741] Add new macros for page-aligned data and bss sections. This patch is preparation for replacing most uses of ".bss.page_aligned" and ".data.page_aligned" in the kernel with macros, so that the section name can later be changed without having to touch a lot of the kernel. The long-term goal here is to be able to change the kernel's magic section names to those that are compatible with -ffunction-sections -fdata-sections. This requires renaming all magic sections with names of the form ".data.foo". Signed-off-by: Tim Abbott Acked-by: David Howells Signed-off-by: Sam Ravnborg --- include/linux/linkage.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/linkage.h b/include/linux/linkage.h index fee9e59649c1..691f59171c6c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -21,6 +21,15 @@ #define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) #define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) +/* + * For assembly routines. + * + * Note when using these that you must specify the appropriate + * alignment directives yourself + */ +#define __PAGE_ALIGNED_DATA .section ".data.page_aligned", "aw" +#define __PAGE_ALIGNED_BSS .section ".bss.page_aligned", "aw" + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. From 39a449d96ac3db9b6d498b6ffbf4c763746d5e8b Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Tue, 23 Jun 2009 18:53:15 -0400 Subject: [PATCH 103/741] asm-generic/vmlinux.lds.h: shuffle INIT_TASK* macro names in vmlinux.lds.h We recently added a INIT_TASK(align) in include/asm-generic/vmlinux.lds.h, but there is already a macro INIT_TASK in include/linux/init_task.h, which is quite confusing. We should switch the macro in the linker script to INIT_TASK_DATA. (Sorry that I missed this in reviewing the patch). Since the macros are new, there is only one user of the INIT_TASK in vmlinux.lds.h, arch/mn10300/kernel/vmlinux.lds.S. However, we are currently using INIT_TASK_DATA for laying down an entire .data.init_task section. So rename that to INIT_TASK_DATA_SECTION. I would be worried about changing the meaning of INIT_TASK_DATA, but the old INIT_TASK_DATA implementation had no users, and in fact if anyone had tried to use it, it would have failed to compile because it didn't pass the alignment to the old INIT_TASK. Signed-off-by: Tim Abbott Cc: David Howells Cc: Jesper Nilsson --- arch/mn10300/kernel/vmlinux.lds.S | 2 +- include/asm-generic/vmlinux.lds.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index bcebcefb4ad7..c96ba3da95ac 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -61,7 +61,7 @@ SECTIONS _edata = .; /* End of data section */ } - .data.init_task : { INIT_TASK(THREAD_SIZE); } + .data.init_task : { INIT_TASK_DATA(THREAD_SIZE); } /* might get freed after init */ . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f92e730695c8..720af4c72206 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -191,7 +191,7 @@ . = ALIGN(align); \ *(.data.cacheline_aligned) -#define INIT_TASK(align) \ +#define INIT_TASK_DATA(align) \ . = ALIGN(align); \ *(.data.init_task) @@ -434,10 +434,10 @@ /* * Init task */ -#define INIT_TASK_DATA(align) \ +#define INIT_TASK_DATA_SECTION(align) \ . = ALIGN(align); \ .data.init_task : { \ - INIT_TASK \ + INIT_TASK_DATA(align) \ } #ifdef CONFIG_CONSTRUCTORS @@ -707,7 +707,7 @@ #define RW_DATA_SECTION(cacheline, pagealigned, inittask) \ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ - INIT_TASK(inittask) \ + INIT_TASK_DATA(inittask) \ CACHELINE_ALIGNED_DATA(cacheline) \ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ From 857eceebd2803c9a3459f784acf45e5266921e4d Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Tue, 23 Jun 2009 19:59:36 -0400 Subject: [PATCH 104/741] Add new __init_task_data macro to be used in arch init_task.c files. This patch is preparation for replacing most ".data.init_task" in the kernel with macros, so that the section name can later be changed without having to touch a lot of the kernel. The long-term goal here is to be able to change the kernel's magic section names to those that are compatible with -ffunction-sections -fdata-sections. This requires renaming all magic sections with names of the form ".data.foo". Signed-off-by: Tim Abbott Signed-off-by: Sam Ravnborg --- include/linux/init_task.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5368fbdc7801..7fc01b13be43 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -183,5 +183,8 @@ extern struct cred init_cred; LIST_HEAD_INIT(cpu_timers[2]), \ } +/* Attach to the init_task data structure for proper alignment */ +#define __init_task_data __attribute__((__section__(".data.init_task"))) + #endif From 1ab18486e4e8bf9554d8439207b97422d7466d77 Mon Sep 17 00:00:00 2001 From: maximilian attems Date: Fri, 26 Jun 2009 20:04:36 +0200 Subject: [PATCH 105/741] kbuild: deb-pkg ship changelog In the series for 2.6.31 it was noticed to ship the copyright, but the generated changelog got lost somehow. As bonus the generated linux-image deb packages are Lenny lintian clean. Cc: Frans Pop Cc: Andres Salomon Signed-off-by: maximilian attems Signed-off-by: Sam Ravnborg --- scripts/package/builddeb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 01c2d13dd020..b19f1f4962e3 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -16,6 +16,8 @@ create_package() { local pname="$1" pdir="$2" cp debian/copyright "$pdir/usr/share/doc/$pname/" + cp debian/changelog "$pdir/usr/share/doc/$pname/changelog.Debian" + gzip -9 "$pdir/usr/share/doc/$pname/changelog.Debian" # Fix ownership and permissions chown -R root:root "$pdir" From 7a6b1f1c0c492a6bb6f778dff0f9f5facb90d1a1 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 22 Jun 2009 17:18:32 +0800 Subject: [PATCH 106/741] gitignore: ignore gcov output files Ignore *.gcno files which are generated by gcov. Signed-off-by: WANG Cong Signed-off-by: Sam Ravnborg --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cecb3b040cc1..b93fb7eff942 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ *.gz *.lzma *.patch +*.gcno # # Top-level generic files From a8735821d198675dd326cc5847e79df79c735119 Mon Sep 17 00:00:00 2001 From: Floris Kraak Date: Mon, 15 Jun 2009 08:54:02 +0300 Subject: [PATCH 107/741] Kbuild: Disable the -Wformat-security gcc flag Some distributions have enabled the gcc flag -Wformat-security by default. This results in a number of warnings about format arguments to functions, sometimes in cases where fixing the warning is not likely to actually fix a bug. Instead of hand patching a dozens of places (possibly more) that produce warnings that get ignored anyway we just turn off the flag in the Makefile. Signed-off-by: Floris Kraak Signed-off-by: Pekka Enberg Signed-off-by: Sam Ravnborg --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1216fea0c92..8fb9bfce212c 100644 --- a/Makefile +++ b/Makefile @@ -344,7 +344,8 @@ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ - -Werror-implicit-function-declaration + -Werror-implicit-function-declaration \ + -Wno-format-security KBUILD_AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) From c512d2544c688ff1fab18a530860a9c7440a71b7 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 20 Jun 2009 18:25:25 +0530 Subject: [PATCH 108/741] gitignore: ignore scripts/ihex2fw scripts/ihex2fw is a generated binary and should be ignored Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Sam Ravnborg --- scripts/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/.gitignore b/scripts/.gitignore index b939fbd01195..52cab46ae35a 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -7,3 +7,4 @@ pnmtologo bin2c unifdef binoffset +ihex2fw From 112942353992d95099fb5b71c679ff1046fccfcf Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 19 Jun 2009 03:40:26 -0400 Subject: [PATCH 109/741] kbuild: finally remove the obsolete variable $TOPDIR TOPDIR is obsolete, it can be finally removed now. Signed-off-by: WANG Cong Signed-off-by: Sam Ravnborg --- Makefile | 4 +--- drivers/scsi/cxgb3i/Kbuild | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8fb9bfce212c..b4c7ef5ab431 100644 --- a/Makefile +++ b/Makefile @@ -140,15 +140,13 @@ _all: modules endif srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) -TOPDIR := $(srctree) -# FIXME - TOPDIR is obsolete, use srctree/objtree objtree := $(CURDIR) src := $(srctree) obj := $(objtree) VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) -export srctree objtree VPATH TOPDIR +export srctree objtree VPATH # SUBARCH tells the usermode build what the underlying arch is. That is set diff --git a/drivers/scsi/cxgb3i/Kbuild b/drivers/scsi/cxgb3i/Kbuild index 25a2032bfa26..70d060b7ff4f 100644 --- a/drivers/scsi/cxgb3i/Kbuild +++ b/drivers/scsi/cxgb3i/Kbuild @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 +EXTRA_CFLAGS += -I$(srctree)/drivers/net/cxgb3 cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_pdu.o cxgb3i_offload.o cxgb3i_ddp.o obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i.o From fde953c1c67986e1c381fa50d8207b1578b5cefa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:06:39 +0200 Subject: [PATCH 110/741] perf_counter tools: Remove dead code Vince Weaver reported that there's a handful of #ifdef __MINGW32__ sections in the code. Remove them as they are in essence dead code - as unlike upstream Git, the perf tool is unlikely to be ported to Windows. Reported-by: Vince Weaver Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/help.c | 15 ------ tools/perf/util/pager.c | 5 +- tools/perf/util/run-command.c | 95 +---------------------------------- tools/perf/util/run-command.h | 5 -- tools/perf/util/util.h | 15 ------ 5 files changed, 3 insertions(+), 132 deletions(-) diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6653f7dd1d78..17a00e0df2c4 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -126,21 +126,6 @@ static int is_executable(const char *name) !S_ISREG(st.st_mode)) return 0; -#ifdef __MINGW32__ - /* cannot trust the executable bit, peek into the file instead */ - char buf[3] = { 0 }; - int n; - int fd = open(name, O_RDONLY); - st.st_mode &= ~S_IXUSR; - if (fd >= 0) { - n = read(fd, buf, 2); - if (n == 2) - /* DOS executables start with "MZ" */ - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) - st.st_mode |= S_IXUSR; - close(fd); - } -#endif return st.st_mode & S_IXUSR; } diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index a28bccae5458..1915de20dcac 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -9,7 +9,6 @@ static int spawned_pager; -#ifndef __MINGW32__ static void pager_preexec(void) { /* @@ -24,7 +23,6 @@ static void pager_preexec(void) setenv("LESS", "FRSX", 0); } -#endif static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; static struct child_process pager_process; @@ -70,9 +68,8 @@ void setup_pager(void) pager_argv[2] = pager; pager_process.argv = pager_argv; pager_process.in = -1; -#ifndef __MINGW32__ pager_process.preexec_cb = pager_preexec; -#endif + if (start_command(&pager_process)) return; diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c index b2f5e854f40a..a3935343091a 100644 --- a/tools/perf/util/run-command.c +++ b/tools/perf/util/run-command.c @@ -65,7 +65,6 @@ int start_command(struct child_process *cmd) cmd->err = fderr[0]; } -#ifndef __MINGW32__ fflush(NULL); cmd->pid = fork(); if (!cmd->pid) { @@ -118,71 +117,6 @@ int start_command(struct child_process *cmd) } exit(127); } -#else - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ - const char **sargv = cmd->argv; - char **env = environ; - - if (cmd->no_stdin) { - s0 = dup(0); - dup_devnull(0); - } else if (need_in) { - s0 = dup(0); - dup2(fdin[0], 0); - } else if (cmd->in) { - s0 = dup(0); - dup2(cmd->in, 0); - } - - if (cmd->no_stderr) { - s2 = dup(2); - dup_devnull(2); - } else if (need_err) { - s2 = dup(2); - dup2(fderr[1], 2); - } - - if (cmd->no_stdout) { - s1 = dup(1); - dup_devnull(1); - } else if (cmd->stdout_to_stderr) { - s1 = dup(1); - dup2(2, 1); - } else if (need_out) { - s1 = dup(1); - dup2(fdout[1], 1); - } else if (cmd->out > 1) { - s1 = dup(1); - dup2(cmd->out, 1); - } - - if (cmd->dir) - die("chdir in start_command() not implemented"); - if (cmd->env) { - env = copy_environ(); - for (; *cmd->env; cmd->env++) - env = env_setenv(env, *cmd->env); - } - - if (cmd->perf_cmd) { - cmd->argv = prepare_perf_cmd(cmd->argv); - } - - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); - - if (cmd->env) - free_environ(env); - if (cmd->perf_cmd) - free(cmd->argv); - - cmd->argv = sargv; - if (s0 >= 0) - dup2(s0, 0), close(s0); - if (s1 >= 0) - dup2(s1, 1), close(s1); - if (s2 >= 0) - dup2(s2, 2), close(s2); -#endif if (cmd->pid < 0) { int err = errno; @@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const return run_command(&cmd); } -#ifdef __MINGW32__ -static __stdcall unsigned run_thread(void *data) -{ - struct async *async = data; - return async->proc(async->fd_for_proc, async->data); -} -#endif - int start_async(struct async *async) { int pipe_out[2]; @@ -304,7 +230,6 @@ int start_async(struct async *async) return error("cannot create pipe: %s", strerror(errno)); async->out = pipe_out[0]; -#ifndef __MINGW32__ /* Flush stdio before fork() to avoid cloning buffers */ fflush(NULL); @@ -319,33 +244,17 @@ int start_async(struct async *async) exit(!!async->proc(pipe_out[1], async->data)); } close(pipe_out[1]); -#else - async->fd_for_proc = pipe_out[1]; - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); - if (!async->tid) { - error("cannot create thread: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } -#endif + return 0; } int finish_async(struct async *async) { -#ifndef __MINGW32__ int ret = 0; if (wait_or_whine(async->pid)) ret = error("waitpid (async) failed"); -#else - DWORD ret = 0; - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) - ret = error("waiting for thread failed: %lu", GetLastError()); - else if (!GetExitCodeThread(async->tid, &ret)) - ret = error("cannot get thread exit code: %lu", GetLastError()); - CloseHandle(async->tid); -#endif + return ret; } diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index 328289f23669..cc1837deba88 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h @@ -79,12 +79,7 @@ struct async { int (*proc)(int fd, void *data); void *data; int out; /* caller reads from here and closes it */ -#ifndef __MINGW32__ pid_t pid; -#else - HANDLE tid; - int fd_for_proc; -#endif }; int start_async(struct async *async); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b8cfed776d81..b4be6071c105 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -67,7 +67,6 @@ #include #include #include -#ifndef __MINGW32__ #include #include #include @@ -81,20 +80,6 @@ #include #include #include -#if defined(__CYGWIN__) -#undef _XOPEN_SOURCE -#include -#define _XOPEN_SOURCE 600 -#include "compat/cygwin.h" -#else -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 -#endif -#else /* __MINGW32__ */ -/* pull in Windows compatibility stuff */ -#include "compat/mingw.h" -#endif /* __MINGW32__ */ #ifndef NO_ICONV #include From 0cfb7a13b8e4e0afd4b856156ab16a182de7505b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:10:30 +0200 Subject: [PATCH 111/741] perf stat: Add -n/--null option to run without counters Allow a no-counters run. This can be useful to measure just elapsed wall-clock time - or to assess the raw overhead of perf stat itself, without running any counters. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8420ec589506..cdcd058fac08 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -70,6 +70,7 @@ static int run_count = 1; static int inherit = 1; static int scale = 1; static int target_pid = -1; +static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -461,6 +462,8 @@ static const struct option options[] = { "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, "repeat command and print average + stddev (max: 100)"), + OPT_BOOLEAN('n', "null", &null_run, + "null run - dont start any counters"), OPT_END() }; @@ -476,7 +479,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!nr_counters) + if (!null_run && !nr_counters) nr_counters = 8; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); From 566747e6298289c5cb02d4939cb3abf1c4fe7e5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:24:32 +0200 Subject: [PATCH 112/741] perf stat: Fix multi-run stats In multi-run (-r/--repeat) printouts, print out the noise of the wall-clock average as well. Also, fix a bug in printing out scaled counters: if it was not scaled then we should not update the average with -1. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cdcd058fac08..52c176cc683e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,8 +353,11 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); + if (event_scaled[i][j] != -1) + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + else + event_scaled_avg[j] = -1; } } runtime_nsecs_avg /= run_count; @@ -420,9 +423,13 @@ static void print_stat(int argc, const char **argv) fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", + fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); - fprintf(stderr, "\n"); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", + 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); + } + fprintf(stderr, "\n\n"); } static volatile int signr = -1; From 70ec3bb8ea3f8c55b255f41d122c7d4d8c0d00b4 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 27 Jun 2009 09:55:32 +0200 Subject: [PATCH 113/741] mtd: Use BLOCK_NIL consistently in NFTL/INFTL Use BLOCK_NIL consistently rather than sometimes 0xffff and sometimes BLOCK_NIL. The semantic patch that finds this issue is below (http://www.emn.fr/x-info/coccinelle/). On the other hand, the changes were made by hand, in part because drivers/mtd/inftlcore.c contains dead code that causes spatch to ignore a relevant function. Specifically, the function INFTL_findwriteunit contains a do-while loop, but always takes a return that leaves the loop on the first iteration. // @r exists@ identifier f,C; @@ f(...) { ... return C; } @s@ identifier r.C; expression E; @@ @@ identifier r.f,r.C,I; expression s.E; @@ f(...) { <... ( I | - E + C ) ...> } // Signed-off-by: Julia Lawall Signed-off-by: David Woodhouse --- drivers/mtd/inftlcore.c | 11 ++++++----- drivers/mtd/nftlcore.c | 16 ++++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index 73f05227dc8c..d8cf29c01cc4 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -226,7 +226,7 @@ static u16 INFTL_findfreeblock(struct INFTLrecord *inftl, int desperate) if (!desperate && inftl->numfreeEUNs < 2) { DEBUG(MTD_DEBUG_LEVEL1, "INFTL: there are too few free " "EUNs (%d)\n", inftl->numfreeEUNs); - return 0xffff; + return BLOCK_NIL; } /* Scan for a free block */ @@ -281,7 +281,8 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned silly = MAX_LOOPS; while (thisEUN < inftl->nb_blocks) { for (block = 0; block < inftl->EraseSize/SECTORSIZE; block ++) { - if ((BlockMap[block] != 0xffff) || BlockDeleted[block]) + if ((BlockMap[block] != BLOCK_NIL) || + BlockDeleted[block]) continue; if (inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) @@ -525,7 +526,7 @@ static inline u16 INFTL_findwriteunit(struct INFTLrecord *inftl, unsigned block) if (!silly--) { printk(KERN_WARNING "INFTL: infinite loop in " "Virtual Unit Chain 0x%x\n", thisVUC); - return 0xffff; + return BLOCK_NIL; } /* Skip to next block in chain */ @@ -549,7 +550,7 @@ hitused: * waiting to be picked up. We're going to have to fold * a chain to make room. */ - thisEUN = INFTL_makefreeblock(inftl, 0xffff); + thisEUN = INFTL_makefreeblock(inftl, BLOCK_NIL); /* * Hopefully we free something, lets try again. @@ -631,7 +632,7 @@ hitused: printk(KERN_WARNING "INFTL: error folding to make room for Virtual " "Unit Chain 0x%x\n", thisVUC); - return 0xffff; + return BLOCK_NIL; } /* diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index e3f8495a94c2..fb86cacd5bdb 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -208,7 +208,7 @@ static u16 NFTL_findfreeblock(struct NFTLrecord *nftl, int desperate ) /* Normally, we force a fold to happen before we run out of free blocks completely */ if (!desperate && nftl->numfreeEUNs < 2) { DEBUG(MTD_DEBUG_LEVEL1, "NFTL_findfreeblock: there are too few free EUNs\n"); - return 0xffff; + return BLOCK_NIL; } /* Scan for a free block */ @@ -230,11 +230,11 @@ static u16 NFTL_findfreeblock(struct NFTLrecord *nftl, int desperate ) printk("Argh! No free blocks found! LastFreeEUN = %d, " "FirstEUN = %d\n", nftl->LastFreeEUN, le16_to_cpu(nftl->MediaHdr.FirstPhysicalEUN)); - return 0xffff; + return BLOCK_NIL; } } while (pot != nftl->LastFreeEUN); - return 0xffff; + return BLOCK_NIL; } static u16 NFTL_foldchain (struct NFTLrecord *nftl, unsigned thisVUC, unsigned pendingblock ) @@ -431,7 +431,7 @@ static u16 NFTL_foldchain (struct NFTLrecord *nftl, unsigned thisVUC, unsigned p /* add the header so that it is now a valid chain */ oob.u.a.VirtUnitNum = oob.u.a.SpareVirtUnitNum = cpu_to_le16(thisVUC); - oob.u.a.ReplUnitNum = oob.u.a.SpareReplUnitNum = 0xffff; + oob.u.a.ReplUnitNum = oob.u.a.SpareReplUnitNum = BLOCK_NIL; nftl_write_oob(mtd, (nftl->EraseSize * targetEUN) + 8, 8, &retlen, (char *)&oob.u); @@ -515,7 +515,7 @@ static u16 NFTL_makefreeblock( struct NFTLrecord *nftl , unsigned pendingblock) if (ChainLength < 2) { printk(KERN_WARNING "No Virtual Unit Chains available for folding. " "Failing request\n"); - return 0xffff; + return BLOCK_NIL; } return NFTL_foldchain (nftl, LongestChain, pendingblock); @@ -578,7 +578,7 @@ static inline u16 NFTL_findwriteunit(struct NFTLrecord *nftl, unsigned block) printk(KERN_WARNING "Infinite loop in Virtual Unit Chain 0x%x\n", thisVUC); - return 0xffff; + return BLOCK_NIL; } /* Skip to next block in chain */ @@ -601,7 +601,7 @@ static inline u16 NFTL_findwriteunit(struct NFTLrecord *nftl, unsigned block) //u16 startEUN = nftl->EUNtable[thisVUC]; //printk("Write to VirtualUnitChain %d, calling makefreeblock()\n", thisVUC); - writeEUN = NFTL_makefreeblock(nftl, 0xffff); + writeEUN = NFTL_makefreeblock(nftl, BLOCK_NIL); if (writeEUN == BLOCK_NIL) { /* OK, we accept that the above comment is @@ -673,7 +673,7 @@ static inline u16 NFTL_findwriteunit(struct NFTLrecord *nftl, unsigned block) printk(KERN_WARNING "Error folding to make room for Virtual Unit Chain 0x%x\n", thisVUC); - return 0xffff; + return BLOCK_NIL; } static int nftl_writeblock(struct mtd_blktrans_dev *mbd, unsigned long block, From 9a24ee03aebc55cec00cc388b6727bff24ed433f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 24 Jun 2009 17:13:48 +0100 Subject: [PATCH 114/741] [ARM] 5563/1: at91: at91sam9rlek lcd interface correction Here is a little update to the at91sam9rlek lcd interface. This will correct the power pin of the LCD. It will also add precision to the struct atmel_lcdfb_info scructure: backlight enabling and wiring mode correction: RGB wiring on the -EK board. Signed-off-by: Nicolas Ferre Acked-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/board-sam9rlek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c index 35e12a49d1a6..f6b5672cabd6 100644 --- a/arch/arm/mach-at91/board-sam9rlek.c +++ b/arch/arm/mach-at91/board-sam9rlek.c @@ -186,19 +186,21 @@ static struct fb_monspecs at91fb_default_monspecs = { static void at91_lcdc_power_control(int on) { if (on) - at91_set_gpio_value(AT91_PIN_PA30, 0); /* power up */ + at91_set_gpio_value(AT91_PIN_PC1, 0); /* power up */ else - at91_set_gpio_value(AT91_PIN_PA30, 1); /* power down */ + at91_set_gpio_value(AT91_PIN_PC1, 1); /* power down */ } /* Driver datas */ static struct atmel_lcdfb_info __initdata ek_lcdc_data = { + .lcdcon_is_backlight = true, .default_bpp = 16, .default_dmacon = ATMEL_LCDC_DMAEN, .default_lcdcon2 = AT91SAM9RL_DEFAULT_LCDCON2, .default_monspecs = &at91fb_default_monspecs, .atmel_lcdfb_power_control = at91_lcdc_power_control, .guard_time = 1, + .lcd_wiring_mode = ATMEL_LCDC_WIRING_RGB, }; #else From 6e750a8fc009fd0ae98704525d1d8e80d60e8cc9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 03:02:07 +0530 Subject: [PATCH 115/741] perf stat: Improve output Increase size for event name to handle bigger names like 'L1-d$-prefetch-misses' Changed scaled counters from percentage to a multiplicative factor because the latter is more expressive. Also aligned the scaling factor, otherwise sometimes it looks like: 384 iTLB-load-misses (4.74x scaled) 452029 branch-loads (8.00x scaled) 5892 branch-load-misses (20.39x scaled) 972315 iTLB-loads (3.24x scaled) Before: 150708 L1-d$-stores (scaled from 23.57%) 428804 L1-d$-prefetches (scaled from 23.47%) 314446 L1-d$-prefetch-misses (scaled from 23.42%) 252626137 L1-i$-loads (scaled from 23.24%) 5297550 dTLB-load-misses (scaled from 23.96%) 106992392 branch-loads (scaled from 23.67%) 5239561 branch-load-misses (scaled from 23.43%) After: 1731713 L1-d$-loads ( 14.25x scaled) 44241 L1-d$-prefetches ( 3.88x scaled) 21076 L1-d$-prefetch-misses ( 3.40x scaled) 5789421 L1-i$-loads ( 3.78x scaled) 29645 dTLB-load-misses ( 2.95x scaled) 461474 branch-loads ( 6.52x scaled) 7493 branch-load-misses ( 26.57x scaled) Reported-by: Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246051927.2988.10.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 52c176cc683e..3840a70f05b7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -32,6 +32,7 @@ * Wu Fengguang * Mike Galbraith * Paul Mackerras + * Jaswinder Singh Rajput * * Released under the GPL v2. (and only v2, not any later version) */ @@ -251,7 +252,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) { double msecs = (double)count[0] / 1000000; - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { @@ -265,7 +266,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) static void abs_printout(int counter, u64 *count, u64 *noise) { - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && attrs[counter].type == PERF_TYPE_HARDWARE && @@ -295,7 +296,7 @@ static void print_counter(int counter) scaled = event_scaled_avg[counter]; if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", + fprintf(stderr, " %14s %-24s\n", "", event_name(counter)); return; } @@ -306,8 +307,7 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); + fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); fprintf(stderr, "\n"); } @@ -421,7 +421,6 @@ static void print_stat(int argc, const char **argv) for (counter = 0; counter < nr_counters; counter++) print_counter(counter); - fprintf(stderr, "\n"); fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); From a222ad1a4b2e3ca177a538482c99c519c1ce94d1 Mon Sep 17 00:00:00 2001 From: Karen Xie Date: Fri, 26 Jun 2009 15:17:29 -0700 Subject: [PATCH 116/741] [SCSI] cxgb3i: fix connection error when vlan is enabled There is a bug when VLAN is configured on the cxgb3 interface, the iscsi conn. would be denied with message "cxgb3i: NOT going through cxgbi device." This patch adds code to get the real egress net_device when vlan is configured. Signed-off-by: Karen Xie Reviewed-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 74369a3f963b..c399f485aa7d 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -184,6 +185,9 @@ static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) struct cxgb3i_adapter *snic; int i; + if (ndev->priv_flags & IFF_802_1Q_VLAN) + ndev = vlan_dev_real_dev(ndev); + read_lock(&cxgb3i_snic_rwlock); list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { for (i = 0; i < snic->hba_cnt; i++) { From c276aca46d26aa2347320096f8ecdf5016795c14 Mon Sep 17 00:00:00 2001 From: vimal singh Date: Sat, 27 Jun 2009 11:07:06 +0530 Subject: [PATCH 117/741] mtd: nand: fix build failure and incorrect return from omap_wait() We need to include jiffies.h manually in some cases, and the status returned from omap_wait() was broken in two separate ways. Also add cond_resched() to the loop. Signed-off-by: Vimal Singh Signed-off-by: David Woodhouse --- drivers/mtd/nand/omap2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 0cd76f89f4b0..ebd07e95b814 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -541,7 +543,7 @@ static int omap_wait(struct mtd_info *mtd, struct nand_chip *chip) struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); unsigned long timeo = jiffies; - int status, state = this->state; + int status = NAND_STATUS_FAIL, state = this->state; if (state == FL_ERASING) timeo += (HZ * 400) / 1000; @@ -556,8 +558,9 @@ static int omap_wait(struct mtd_info *mtd, struct nand_chip *chip) while (time_before(jiffies, timeo)) { status = __raw_readb(this->IO_ADDR_R); - if (!(status & 0x40)) + if (status & NAND_STATUS_READY) break; + cond_resched(); } return status; } From c3043569dc8fbe9228b76174f15d1a7152c48a20 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 23:49:09 +0530 Subject: [PATCH 118/741] perf stat: Micro-optimize the code: memcpy is only required if no event is selected and !null_run Set attrs and nr_counters if no event is selected and !null_run. Setting of attrs should depend on number of counters, so we need to memcpy only for sizeof(default_attrs) Also set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246126749.32198.16.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3840a70f05b7..3e5ea4e2e5fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,7 +46,7 @@ #include #include -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { +static struct perf_counter_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -477,16 +477,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - memcpy(attrs, default_attrs, sizeof(attrs)); - argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!null_run && !nr_counters) - nr_counters = 8; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!null_run && !nr_counters) { + memcpy(attrs, default_attrs, sizeof(default_attrs)); + nr_counters = ARRAY_SIZE(default_attrs); + } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); From ec5a36f94e7ca4b1f28ae4dd135cd415a704e772 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 29 Jun 2009 09:57:10 +0300 Subject: [PATCH 119/741] SLAB: Fix lockdep annotations Commit 8429db5... ("slab: setup cpu caches later on when interrupts are enabled") broke mm/slab.c lockdep annotations: [ 11.554715] ============================================= [ 11.555249] [ INFO: possible recursive locking detected ] [ 11.555560] 2.6.31-rc1 #896 [ 11.555861] --------------------------------------------- [ 11.556127] udevd/1899 is trying to acquire lock: [ 11.556436] (&nc->lock){-.-...}, at: [] kmem_cache_free+0xcd/0x25b [ 11.557101] [ 11.557102] but task is already holding lock: [ 11.557706] (&nc->lock){-.-...}, at: [] kfree+0x137/0x292 [ 11.558109] [ 11.558109] other info that might help us debug this: [ 11.558720] 2 locks held by udevd/1899: [ 11.558983] #0: (&nc->lock){-.-...}, at: [] kfree+0x137/0x292 [ 11.559734] #1: (&parent->list_lock){-.-...}, at: [] __drain_alien_cache+0x3b/0xbd [ 11.560442] [ 11.560443] stack backtrace: [ 11.561009] Pid: 1899, comm: udevd Not tainted 2.6.31-rc1 #896 [ 11.561276] Call Trace: [ 11.561632] [] __lock_acquire+0x15ec/0x168f [ 11.561901] [] ? __lock_acquire+0x1676/0x168f [ 11.562171] [] ? trace_hardirqs_on_caller+0x113/0x13e [ 11.562490] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 11.562807] [] lock_acquire+0xc1/0xe5 [ 11.563073] [] ? kmem_cache_free+0xcd/0x25b [ 11.563385] [] _spin_lock+0x31/0x66 [ 11.563696] [] ? kmem_cache_free+0xcd/0x25b [ 11.563964] [] kmem_cache_free+0xcd/0x25b [ 11.564235] [] ? __free_pages+0x1b/0x24 [ 11.564551] [] slab_destroy+0x57/0x5c [ 11.564860] [] free_block+0xd8/0x123 [ 11.565126] [] __drain_alien_cache+0xa2/0xbd [ 11.565441] [] kfree+0x14c/0x292 [ 11.565752] [] skb_release_data+0xc6/0xcb [ 11.566020] [] __kfree_skb+0x19/0x86 [ 11.566286] [] consume_skb+0x2b/0x2d [ 11.566631] [] skb_free_datagram+0x14/0x3a [ 11.566901] [] netlink_recvmsg+0x164/0x258 [ 11.567170] [] sock_recvmsg+0xe5/0xfe [ 11.567486] [] ? might_fault+0xaf/0xb1 [ 11.567802] [] ? autoremove_wake_function+0x0/0x38 [ 11.568073] [] ? core_sys_select+0x3d/0x2b4 [ 11.568378] [] ? __lock_acquire+0x1676/0x168f [ 11.568693] [] ? sockfd_lookup_light+0x1b/0x54 [ 11.568961] [] sys_recvfrom+0xa3/0xf8 [ 11.569228] [] ? trace_hardirqs_on+0xd/0xf [ 11.569546] [] system_call_fastpath+0x16/0x1b# Fix that up. Closes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13654 Tested-by: Venkatesh Pallipadi Signed-off-by: Pekka Enberg --- mm/slab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 5241b6598ba3..7b5d4deacfcd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1544,9 +1544,6 @@ void __init kmem_cache_init(void) } g_cpucache_up = EARLY; - - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); } void __init kmem_cache_init_late(void) @@ -1563,6 +1560,9 @@ void __init kmem_cache_init_late(void) /* Done! */ g_cpucache_up = FULL; + /* Annotate slab for lockdep -- annotate the malloc caches */ + init_lock_keys(); + /* * Register a cpu startup notifier callback that initializes * cpu_cache_get for all new cpus From 9230ccb1071d2d7e4ecb6314e67203b9f7f08140 Mon Sep 17 00:00:00 2001 From: Yan Li Date: Sun, 28 Jun 2009 22:30:56 -0700 Subject: [PATCH 120/741] Input: i8042 - more reset quirks for MSI Wind-clone netbooks When testing Moblin on various netbooks, we've got reports that many MSI Wind clones need the i8042 reset quirks for the keyboard and/or touchpad's proper function. Signed-off-by: Yan Li Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index fb8a3cd3ffd0..924e8ed7f2cf 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -392,6 +392,34 @@ static struct dmi_system_id __initdata i8042_dmi_reset_table[] = { DMI_MATCH(DMI_BOARD_VENDOR, "LG Electronics Inc."), }, }, + { + .ident = "Acer Aspire One 150", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOA150"), + }, + }, + { + .ident = "Advent 4211", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "DIXONSXP"), + DMI_MATCH(DMI_PRODUCT_NAME, "Advent 4211"), + }, + }, + { + .ident = "Medion Akoya Mini E1210", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_PRODUCT_NAME, "E1210"), + }, + }, + { + .ident = "Mivvy M310", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "VIOOO"), + DMI_MATCH(DMI_PRODUCT_NAME, "N10"), + }, + }, { } }; From c413ec446188ae53276eb60a60311b430448c6b0 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Sun, 28 Jun 2009 22:50:58 -0700 Subject: [PATCH 121/741] Input: wacom - add DTF720a support and fix rotation on Intuos3 This patch adds DTF720a support and fixes an Intuos3 rotation pen out-proximity bug. Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 38bf86384aeb..c896d6a21b7e 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -384,6 +384,8 @@ static int wacom_intuos_inout(struct wacom_wac *wacom, void *wcombo) wacom_report_key(wcombo, BTN_STYLUS2, 0); wacom_report_key(wcombo, BTN_TOUCH, 0); wacom_report_abs(wcombo, ABS_WHEEL, 0); + if (wacom->features->type >= INTUOS3S) + wacom_report_abs(wcombo, ABS_Z, 0); } wacom_report_key(wcombo, wacom->tool[idx], 0); wacom_report_abs(wcombo, ABS_MISC, 0); /* reset tool id */ @@ -836,6 +838,7 @@ static struct wacom_features wacom_features[] = { { "Wacom DTU710", 8, 34080, 27660, 511, 0, PL }, { "Wacom DTF521", 8, 6282, 4762, 511, 0, PL }, { "Wacom DTF720", 8, 6858, 5506, 511, 0, PL }, + { "Wacom DTF720a", 8, 6858, 5506, 511, 0, PL }, { "Wacom Cintiq Partner",8, 20480, 15360, 511, 0, PTU }, { "Wacom Intuos2 4x5", 10, 12700, 10600, 1023, 31, INTUOS }, { "Wacom Intuos2 6x8", 10, 20320, 16240, 1023, 31, INTUOS }, @@ -897,8 +900,9 @@ static struct usb_device_id wacom_ids[] = { { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x37) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x38) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x39) }, - { USB_DEVICE(USB_VENDOR_ID_WACOM, 0xC0) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0xC4) }, + { USB_DEVICE(USB_VENDOR_ID_WACOM, 0xC0) }, + { USB_DEVICE(USB_VENDOR_ID_WACOM, 0xC2) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x03) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x41) }, { USB_DEVICE(USB_VENDOR_ID_WACOM, 0x42) }, From 00b8ac409cad653137f087e3ff69c020174cbc15 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 28 Jun 2009 22:30:56 -0700 Subject: [PATCH 122/741] Input: dm355evm_keys - fix kconfig symbol names The keypad driver for the DM355 EVM got slightly broken as it merged, since it moved from input/keyboard to input/misc and its Kconfig symbol changed. This patch copes with the changed Kconfig symbol. Signed-off-by: David Brownell Signed-off-by: Dmitry Torokhov --- drivers/mfd/dm355evm_msp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c index 7ac12cb0be4a..5b6e58a3ba46 100644 --- a/drivers/mfd/dm355evm_msp.c +++ b/drivers/mfd/dm355evm_msp.c @@ -32,8 +32,7 @@ * This driver was tested with firmware revision A4. */ -#if defined(CONFIG_KEYBOARD_DM355EVM) \ - || defined(CONFIG_KEYBOARD_DM355EVM_MODULE) +#if defined(CONFIG_INPUT_DM355EVM) || defined(CONFIG_INPUT_DM355EVM_MODULE) #define msp_has_keyboard() true #else #define msp_has_keyboard() false From ca865a77b5949f5c403e0f13de5a5a9cd499a11e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Sun, 28 Jun 2009 22:38:44 -0700 Subject: [PATCH 123/741] Input: gpio-keys - revert 'change timer to workqueue' This reverts commit 0b346838c5862bfe911432956a106d602535d030. This commit breaks GPIO debouncing by replacing the original mod_timer with schedule_delayed_work in the interrupt handler. The latter does not kick the timer further on GPIO line changes as it should to perform debouncing. Signed-off-by: Jani Nikula Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 2157cd7de00c..9767213b6c8f 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -22,14 +22,13 @@ #include #include #include -#include #include struct gpio_button_data { struct gpio_keys_button *button; struct input_dev *input; - struct delayed_work work; + struct timer_list timer; }; struct gpio_keys_drvdata { @@ -37,10 +36,8 @@ struct gpio_keys_drvdata { struct gpio_button_data data[0]; }; -static void gpio_keys_report_event(struct work_struct *work) +static void gpio_keys_report_event(struct gpio_button_data *bdata) { - struct gpio_button_data *bdata = - container_of(work, struct gpio_button_data, work.work); struct gpio_keys_button *button = bdata->button; struct input_dev *input = bdata->input; unsigned int type = button->type ?: EV_KEY; @@ -50,17 +47,25 @@ static void gpio_keys_report_event(struct work_struct *work) input_sync(input); } +static void gpio_check_button(unsigned long _data) +{ + struct gpio_button_data *data = (struct gpio_button_data *)_data; + + gpio_keys_report_event(data); +} + static irqreturn_t gpio_keys_isr(int irq, void *dev_id) { struct gpio_button_data *bdata = dev_id; struct gpio_keys_button *button = bdata->button; - unsigned long delay; BUG_ON(irq != gpio_to_irq(button->gpio)); - delay = button->debounce_interval ? - msecs_to_jiffies(button->debounce_interval) : 0; - schedule_delayed_work(&bdata->work, delay); + if (button->debounce_interval) + mod_timer(&bdata->timer, + jiffies + msecs_to_jiffies(button->debounce_interval)); + else + gpio_keys_report_event(bdata); return IRQ_HANDLED; } @@ -107,7 +112,8 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) bdata->input = input; bdata->button = button; - INIT_DELAYED_WORK(&bdata->work, gpio_keys_report_event); + setup_timer(&bdata->timer, + gpio_check_button, (unsigned long)bdata); error = gpio_request(button->gpio, button->desc ?: "gpio_keys"); if (error < 0) { @@ -166,7 +172,8 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) fail2: while (--i >= 0) { free_irq(gpio_to_irq(pdata->buttons[i].gpio), &ddata->data[i]); - cancel_delayed_work_sync(&ddata->data[i].work); + if (pdata->buttons[i].debounce_interval) + del_timer_sync(&ddata->data[i].timer); gpio_free(pdata->buttons[i].gpio); } @@ -190,7 +197,8 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) for (i = 0; i < pdata->nbuttons; i++) { int irq = gpio_to_irq(pdata->buttons[i].gpio); free_irq(irq, &ddata->data[i]); - cancel_delayed_work_sync(&ddata->data[i].work); + if (pdata->buttons[i].debounce_interval) + del_timer_sync(&ddata->data[i].timer); gpio_free(pdata->buttons[i].gpio); } From da0d03fe6cecde837f113a8a587f5a872d0fade0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Sun, 28 Jun 2009 22:38:56 -0700 Subject: [PATCH 124/741] Input: gpio-keys - avoid possibility of sleeping in timer function The gpio_get_value function may sleep, so it should not be called in a timer function. Move gpio_get_value calls to workqueue. Signed-off-by: Jani Nikula Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 9767213b6c8f..efed0c9e242e 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -29,6 +30,7 @@ struct gpio_button_data { struct gpio_keys_button *button; struct input_dev *input; struct timer_list timer; + struct work_struct work; }; struct gpio_keys_drvdata { @@ -36,8 +38,10 @@ struct gpio_keys_drvdata { struct gpio_button_data data[0]; }; -static void gpio_keys_report_event(struct gpio_button_data *bdata) +static void gpio_keys_report_event(struct work_struct *work) { + struct gpio_button_data *bdata = + container_of(work, struct gpio_button_data, work); struct gpio_keys_button *button = bdata->button; struct input_dev *input = bdata->input; unsigned int type = button->type ?: EV_KEY; @@ -47,11 +51,11 @@ static void gpio_keys_report_event(struct gpio_button_data *bdata) input_sync(input); } -static void gpio_check_button(unsigned long _data) +static void gpio_keys_timer(unsigned long _data) { struct gpio_button_data *data = (struct gpio_button_data *)_data; - gpio_keys_report_event(data); + schedule_work(&data->work); } static irqreturn_t gpio_keys_isr(int irq, void *dev_id) @@ -65,7 +69,7 @@ static irqreturn_t gpio_keys_isr(int irq, void *dev_id) mod_timer(&bdata->timer, jiffies + msecs_to_jiffies(button->debounce_interval)); else - gpio_keys_report_event(bdata); + schedule_work(&bdata->work); return IRQ_HANDLED; } @@ -113,7 +117,8 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) bdata->input = input; bdata->button = button; setup_timer(&bdata->timer, - gpio_check_button, (unsigned long)bdata); + gpio_keys_timer, (unsigned long)bdata); + INIT_WORK(&bdata->work, gpio_keys_report_event); error = gpio_request(button->gpio, button->desc ?: "gpio_keys"); if (error < 0) { @@ -174,6 +179,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) free_irq(gpio_to_irq(pdata->buttons[i].gpio), &ddata->data[i]); if (pdata->buttons[i].debounce_interval) del_timer_sync(&ddata->data[i].timer); + cancel_work_sync(&ddata->data[i].work); gpio_free(pdata->buttons[i].gpio); } @@ -199,6 +205,7 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) free_irq(irq, &ddata->data[i]); if (pdata->buttons[i].debounce_interval) del_timer_sync(&ddata->data[i].timer); + cancel_work_sync(&ddata->data[i].work); gpio_free(pdata->buttons[i].gpio); } From cb589529f74d69abc111887b45308f333f950ade Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 29 Jun 2009 00:00:52 -0700 Subject: [PATCH 125/741] Input: arrange keyboards alphabetically Hopefully it will reduce conflicts when merging patches. Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 299 ++++++++++++++++---------------- drivers/input/keyboard/Makefile | 32 ++-- 2 files changed, 165 insertions(+), 166 deletions(-) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 9d8f796c6745..d2df1030675a 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -12,6 +12,42 @@ menuconfig INPUT_KEYBOARD if INPUT_KEYBOARD +config KEYBOARD_AAED2000 + tristate "AAED-2000 keyboard" + depends on MACH_AAED2000 + select INPUT_POLLDEV + default y + help + Say Y here to enable the keyboard on the Agilent AAED-2000 + development board. + + To compile this driver as a module, choose M here: the + module will be called aaed2000_kbd. + +config KEYBOARD_AMIGA + tristate "Amiga keyboard" + depends on AMIGA + help + Say Y here if you are running Linux on any AMIGA and have a keyboard + attached. + + To compile this driver as a module, choose M here: the + module will be called amikbd. + +config ATARI_KBD_CORE + bool + +config KEYBOARD_ATARI + tristate "Atari keyboard" + depends on ATARI + select ATARI_KBD_CORE + help + Say Y here if you are running Linux on any Atari and have a keyboard + attached. + + To compile this driver as a module, choose M here: the + module will be called atakbd. + config KEYBOARD_ATKBD tristate "AT keyboard" if EMBEDDED || !X86 default y @@ -68,16 +104,25 @@ config KEYBOARD_ATKBD_RDI_KEYCODES right-hand column will be interpreted as the key shown in the left-hand column. -config KEYBOARD_SUNKBD - tristate "Sun Type 4 and Type 5 keyboard" - select SERIO +config KEYBOARD_BFIN + tristate "Blackfin BF54x keypad support" + depends on (BF54x && !BF544) help - Say Y here if you want to use a Sun Type 4 or Type 5 keyboard, - connected either to the Sun keyboard connector or to an serial - (RS-232) port via a simple adapter. + Say Y here if you want to use the BF54x keypad. To compile this driver as a module, choose M here: the - module will be called sunkbd. + module will be called bf54x-keys. + +config KEYBOARD_CORGI + tristate "Corgi keyboard" + depends on PXA_SHARPSL + default y + help + Say Y here to enable the keyboard on the Sharp Zaurus SL-C7xx + series of PDAs. + + To compile this driver as a module, choose M here: the + module will be called corgikbd. config KEYBOARD_LKKBD tristate "DECstation/VAXstation LK201/LK401 keyboard" @@ -91,113 +136,29 @@ config KEYBOARD_LKKBD To compile this driver as a module, choose M here: the module will be called lkkbd. -config KEYBOARD_LOCOMO - tristate "LoCoMo Keyboard Support" - depends on SHARP_LOCOMO && INPUT_KEYBOARD +config KEYBOARD_EP93XX + tristate "EP93xx Matrix Keypad support" + depends on ARCH_EP93XX help - Say Y here if you are running Linux on a Sharp Zaurus Collie or Poodle based PDA + Say Y here to enable the matrix keypad on the Cirrus EP93XX. To compile this driver as a module, choose M here: the - module will be called locomokbd. + module will be called ep93xx_keypad. -config KEYBOARD_XTKBD - tristate "XT keyboard" - select SERIO +config KEYBOARD_GPIO + tristate "GPIO Buttons" + depends on GENERIC_GPIO help - Say Y here if you want to use the old IBM PC/XT keyboard (or - compatible) on your system. This is only possible with a - parallel port keyboard adapter, you cannot connect it to the - keyboard port on a PC that runs Linux. + This driver implements support for buttons connected + to GPIO pins of various CPUs (and some other chips). + + Say Y here if your device has buttons connected + directly to such GPIO pins. Your board-specific + setup logic must also provide a platform device, + with configuration data saying which GPIOs are used. To compile this driver as a module, choose M here: the - module will be called xtkbd. - -config KEYBOARD_NEWTON - tristate "Newton keyboard" - select SERIO - help - Say Y here if you have a Newton keyboard on a serial port. - - To compile this driver as a module, choose M here: the - module will be called newtonkbd. - -config KEYBOARD_STOWAWAY - tristate "Stowaway keyboard" - select SERIO - help - Say Y here if you have a Stowaway keyboard on a serial port. - Stowaway compatible keyboards like Dicota Input-PDA keyboard - are also supported by this driver. - - To compile this driver as a module, choose M here: the - module will be called stowaway. - -config KEYBOARD_CORGI - tristate "Corgi keyboard" - depends on PXA_SHARPSL - default y - help - Say Y here to enable the keyboard on the Sharp Zaurus SL-C7xx - series of PDAs. - - To compile this driver as a module, choose M here: the - module will be called corgikbd. - -config KEYBOARD_SPITZ - tristate "Spitz keyboard" - depends on PXA_SHARPSL - default y - help - Say Y here to enable the keyboard on the Sharp Zaurus SL-C1000, - SL-C3000 and Sl-C3100 series of PDAs. - - To compile this driver as a module, choose M here: the - module will be called spitzkbd. - -config KEYBOARD_TOSA - tristate "Tosa keyboard" - depends on MACH_TOSA - default y - help - Say Y here to enable the keyboard on the Sharp Zaurus SL-6000x (Tosa) - - To compile this driver as a module, choose M here: the - module will be called tosakbd. - -config KEYBOARD_TOSA_USE_EXT_KEYCODES - bool "Tosa keyboard: use extended keycodes" - depends on KEYBOARD_TOSA - default n - help - Say Y here to enable the tosa keyboard driver to generate extended - (>= 127) keycodes. Be aware, that they can't be correctly interpreted - by either console keyboard driver or by Kdrive keybd driver. - - Say Y only if you know, what you are doing! - -config KEYBOARD_AMIGA - tristate "Amiga keyboard" - depends on AMIGA - help - Say Y here if you are running Linux on any AMIGA and have a keyboard - attached. - - To compile this driver as a module, choose M here: the - module will be called amikbd. - -config ATARI_KBD_CORE - bool - -config KEYBOARD_ATARI - tristate "Atari keyboard" - depends on ATARI - select ATARI_KBD_CORE - help - Say Y here if you are running Linux on any Atari and have a keyboard - attached. - - To compile this driver as a module, choose M here: the - module will be called atakbd. + module will be called gpio-keys. config KEYBOARD_HIL_OLD tristate "HP HIL keyboard support (simple driver)" @@ -261,14 +222,33 @@ config KEYBOARD_LM8323 To compile this driver as a module, choose M here: the module will be called lm8323. -config KEYBOARD_OMAP - tristate "TI OMAP keypad support" - depends on (ARCH_OMAP1 || ARCH_OMAP2) +config KEYBOARD_LOCOMO + tristate "LoCoMo Keyboard Support" + depends on SHARP_LOCOMO help - Say Y here if you want to use the OMAP keypad. + Say Y here if you are running Linux on a Sharp Zaurus Collie or Poodle based PDA To compile this driver as a module, choose M here: the - module will be called omap-keypad. + module will be called locomokbd. + +config KEYBOARD_MAPLE + tristate "Maple bus keyboard" + depends on SH_DREAMCAST && MAPLE + help + Say Y here if you have a Dreamcast console running Linux and have + a keyboard attached to its Maple bus. + + To compile this driver as a module, choose M here: the + module will be called maple_keyb. + +config KEYBOARD_NEWTON + tristate "Newton keyboard" + select SERIO + help + Say Y here if you have a Newton keyboard on a serial port. + + To compile this driver as a module, choose M here: the + module will be called newtonkbd. config KEYBOARD_PXA27x tristate "PXA27x/PXA3xx keypad support" @@ -288,51 +268,38 @@ config KEYBOARD_PXA930_ROTARY To compile this driver as a module, choose M here: the module will be called pxa930_rotary. -config KEYBOARD_AAED2000 - tristate "AAED-2000 keyboard" - depends on MACH_AAED2000 - select INPUT_POLLDEV +config KEYBOARD_SPITZ + tristate "Spitz keyboard" + depends on PXA_SHARPSL default y help - Say Y here to enable the keyboard on the Agilent AAED-2000 - development board. + Say Y here to enable the keyboard on the Sharp Zaurus SL-C1000, + SL-C3000 and Sl-C3100 series of PDAs. To compile this driver as a module, choose M here: the - module will be called aaed2000_kbd. + module will be called spitzkbd. -config KEYBOARD_GPIO - tristate "GPIO Buttons" - depends on GENERIC_GPIO +config KEYBOARD_STOWAWAY + tristate "Stowaway keyboard" + select SERIO help - This driver implements support for buttons connected - to GPIO pins of various CPUs (and some other chips). - - Say Y here if your device has buttons connected - directly to such GPIO pins. Your board-specific - setup logic must also provide a platform device, - with configuration data saying which GPIOs are used. + Say Y here if you have a Stowaway keyboard on a serial port. + Stowaway compatible keyboards like Dicota Input-PDA keyboard + are also supported by this driver. To compile this driver as a module, choose M here: the - module will be called gpio-keys. + module will be called stowaway. -config KEYBOARD_MAPLE - tristate "Maple bus keyboard" - depends on SH_DREAMCAST && MAPLE +config KEYBOARD_SUNKBD + tristate "Sun Type 4 and Type 5 keyboard" + select SERIO help - Say Y here if you have a Dreamcast console running Linux and have - a keyboard attached to its Maple bus. + Say Y here if you want to use a Sun Type 4 or Type 5 keyboard, + connected either to the Sun keyboard connector or to an serial + (RS-232) port via a simple adapter. To compile this driver as a module, choose M here: the - module will be called maple_keyb. - -config KEYBOARD_BFIN - tristate "Blackfin BF54x keypad support" - depends on (BF54x && !BF544) - help - Say Y here if you want to use the BF54x keypad. - - To compile this driver as a module, choose M here: the - module will be called bf54x-keys. + module will be called sunkbd. config KEYBOARD_SH_KEYSC tristate "SuperH KEYSC keypad support" @@ -344,13 +311,45 @@ config KEYBOARD_SH_KEYSC To compile this driver as a module, choose M here: the module will be called sh_keysc. -config KEYBOARD_EP93XX - tristate "EP93xx Matrix Keypad support" - depends on ARCH_EP93XX +config KEYBOARD_OMAP + tristate "TI OMAP keypad support" + depends on (ARCH_OMAP1 || ARCH_OMAP2) help - Say Y here to enable the matrix keypad on the Cirrus EP93XX. + Say Y here if you want to use the OMAP keypad. To compile this driver as a module, choose M here: the - module will be called ep93xx_keypad. + module will be called omap-keypad. + +config KEYBOARD_TOSA + tristate "Tosa keyboard" + depends on MACH_TOSA + default y + help + Say Y here to enable the keyboard on the Sharp Zaurus SL-6000x (Tosa) + + To compile this driver as a module, choose M here: the + module will be called tosakbd. + +config KEYBOARD_TOSA_USE_EXT_KEYCODES + bool "Tosa keyboard: use extended keycodes" + depends on KEYBOARD_TOSA + help + Say Y here to enable the tosa keyboard driver to generate extended + (>= 127) keycodes. Be aware, that they can't be correctly interpreted + by either console keyboard driver or by Kdrive keybd driver. + + Say Y only if you know, what you are doing! + +config KEYBOARD_XTKBD + tristate "XT keyboard" + select SERIO + help + Say Y here if you want to use the old IBM PC/XT keyboard (or + compatible) on your system. This is only possible with a + parallel port keyboard adapter, you cannot connect it to the + keyboard port on a PC that runs Linux. + + To compile this driver as a module, choose M here: the + module will be called xtkbd. endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 156b647a259b..632efbc18c44 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -4,29 +4,29 @@ # Each configuration option enables a list of files. -obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o -obj-$(CONFIG_KEYBOARD_SUNKBD) += sunkbd.o -obj-$(CONFIG_KEYBOARD_LKKBD) += lkkbd.o -obj-$(CONFIG_KEYBOARD_XTKBD) += xtkbd.o +obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o -obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o -obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o -obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o +obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o +obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o obj-$(CONFIG_KEYBOARD_CORGI) += corgikbd.o -obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o -obj-$(CONFIG_KEYBOARD_TOSA) += tosakbd.o +obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o +obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o obj-$(CONFIG_KEYBOARD_HIL) += hil_kbd.o obj-$(CONFIG_KEYBOARD_HIL_OLD) += hilkbd.o +obj-$(CONFIG_KEYBOARD_HP6XX) += jornada680_kbd.o +obj-$(CONFIG_KEYBOARD_HP7XX) += jornada720_kbd.o +obj-$(CONFIG_KEYBOARD_LKKBD) += lkkbd.o obj-$(CONFIG_KEYBOARD_LM8323) += lm8323.o +obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o +obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o +obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o obj-$(CONFIG_KEYBOARD_PXA930_ROTARY) += pxa930_rotary.o -obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o -obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o -obj-$(CONFIG_KEYBOARD_HP6XX) += jornada680_kbd.o -obj-$(CONFIG_KEYBOARD_HP7XX) += jornada720_kbd.o -obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o -obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o obj-$(CONFIG_KEYBOARD_SH_KEYSC) += sh_keysc.o -obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o +obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o +obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o +obj-$(CONFIG_KEYBOARD_SUNKBD) += sunkbd.o +obj-$(CONFIG_KEYBOARD_TOSA) += tosakbd.o +obj-$(CONFIG_KEYBOARD_XTKBD) += xtkbd.o From bab7614d6d1b1fc96ec6c5a7ca34c8641060e659 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 29 Jun 2009 00:20:52 -0700 Subject: [PATCH 126/741] Input: add support for generic GPIO-based matrix keypad Original patch by Marek Vasut, modified by Eric in: 1. use delayed work to simplify the debouncing 2. combine col_polarity/row_polarity into a single active_low field 3. use a generic bit array based XOR algorithm to detect key press/release, which should make the column assertion time shorter and code a bit cleaner 4. remove the ALT_FN handling, which is no way generic, the ALT_FN key should be treated as no different from other keys, and translation will be done by user space by commands like 'loadkeys'. 5. explicitly disable row IRQs and flush potential pending work, and schedule an immediate scan after resuming as suggested by Uli Luckas 6. incorporate review comments from many others Patch tested on Littleton/PXA310 (though PXA310 has a dedicate keypad controller, I have to configure those pins as generic GPIO to use this driver, works quite well, though), and Sharp Zaurus model SL-C7x0 and SL-C1000. [dtor@mail.ru: fix error unwinding path, support changing keymap from userspace] Signed-off-by: Marek Vasut Reviewed-by: Trilok Soni Reviewed-by: Uli Luckas Reviewed-by: Russell King Reviewed-by: Robert Jarzmik Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 13 +- drivers/input/keyboard/Makefile | 1 + drivers/input/keyboard/matrix_keypad.c | 453 +++++++++++++++++++++++++ include/linux/input/matrix_keypad.h | 65 ++++ 4 files changed, 530 insertions(+), 2 deletions(-) create mode 100644 drivers/input/keyboard/matrix_keypad.c create mode 100644 include/linux/input/matrix_keypad.h diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index d2df1030675a..a6b989a9dc07 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -158,7 +158,16 @@ config KEYBOARD_GPIO with configuration data saying which GPIOs are used. To compile this driver as a module, choose M here: the - module will be called gpio-keys. + module will be called gpio_keys. + +config KEYBOARD_MATRIX + tristate "GPIO driven matrix keypad support" + depends on GENERIC_GPIO + help + Enable support for GPIO driven matrix keypad. + + To compile this driver as a module, choose M here: the + module will be called matrix_keypad. config KEYBOARD_HIL_OLD tristate "HP HIL keyboard support (simple driver)" @@ -254,7 +263,7 @@ config KEYBOARD_PXA27x tristate "PXA27x/PXA3xx keypad support" depends on PXA27x || PXA3xx help - Enable support for PXA27x/PXA3xx keypad controller + Enable support for PXA27x/PXA3xx keypad controller. To compile this driver as a module, choose M here: the module will be called pxa27x_keypad. diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 632efbc18c44..b5b5eae9724f 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_KEYBOARD_LKKBD) += lkkbd.o obj-$(CONFIG_KEYBOARD_LM8323) += lm8323.o obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o +obj-$(CONFIG_KEYBOARD_MATRIX) += matrix_keypad.o obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c new file mode 100644 index 000000000000..e9b2e7cb05be --- /dev/null +++ b/drivers/input/keyboard/matrix_keypad.c @@ -0,0 +1,453 @@ +/* + * GPIO driven matrix keyboard driver + * + * Copyright (c) 2008 Marek Vasut + * + * Based on corgikbd.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct matrix_keypad { + const struct matrix_keypad_platform_data *pdata; + struct input_dev *input_dev; + unsigned short *keycodes; + + uint32_t last_key_state[MATRIX_MAX_COLS]; + struct delayed_work work; + bool scan_pending; + bool stopped; + spinlock_t lock; +}; + +/* + * NOTE: normally the GPIO has to be put into HiZ when de-activated to cause + * minmal side effect when scanning other columns, here it is configured to + * be input, and it should work on most platforms. + */ +static void __activate_col(const struct matrix_keypad_platform_data *pdata, + int col, bool on) +{ + bool level_on = !pdata->active_low; + + if (on) { + gpio_direction_output(pdata->col_gpios[col], level_on); + } else { + gpio_set_value_cansleep(pdata->col_gpios[col], !level_on); + gpio_direction_input(pdata->col_gpios[col]); + } +} + +static void activate_col(const struct matrix_keypad_platform_data *pdata, + int col, bool on) +{ + __activate_col(pdata, col, on); + + if (on && pdata->col_scan_delay_us) + udelay(pdata->col_scan_delay_us); +} + +static void activate_all_cols(const struct matrix_keypad_platform_data *pdata, + bool on) +{ + int col; + + for (col = 0; col < pdata->num_col_gpios; col++) + __activate_col(pdata, col, on); +} + +static bool row_asserted(const struct matrix_keypad_platform_data *pdata, + int row) +{ + return gpio_get_value_cansleep(pdata->row_gpios[row]) ? + !pdata->active_low : pdata->active_low; +} + +static void enable_row_irqs(struct matrix_keypad *keypad) +{ + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i; + + for (i = 0; i < pdata->num_row_gpios; i++) + enable_irq(gpio_to_irq(pdata->row_gpios[i])); +} + +static void disable_row_irqs(struct matrix_keypad *keypad) +{ + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i; + + for (i = 0; i < pdata->num_row_gpios; i++) + disable_irq_nosync(gpio_to_irq(pdata->row_gpios[i])); +} + +/* + * This gets the keys from keyboard and reports it to input subsystem + */ +static void matrix_keypad_scan(struct work_struct *work) +{ + struct matrix_keypad *keypad = + container_of(work, struct matrix_keypad, work.work); + struct input_dev *input_dev = keypad->input_dev; + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + uint32_t new_state[MATRIX_MAX_COLS]; + int row, col, code; + + /* de-activate all columns for scanning */ + activate_all_cols(pdata, false); + + memset(new_state, 0, sizeof(new_state)); + + /* assert each column and read the row status out */ + for (col = 0; col < pdata->num_col_gpios; col++) { + + activate_col(pdata, col, true); + + for (row = 0; row < pdata->num_row_gpios; row++) + new_state[col] |= + row_asserted(pdata, row) ? (1 << row) : 0; + + activate_col(pdata, col, false); + } + + for (col = 0; col < pdata->num_col_gpios; col++) { + uint32_t bits_changed; + + bits_changed = keypad->last_key_state[col] ^ new_state[col]; + if (bits_changed == 0) + continue; + + for (row = 0; row < pdata->num_row_gpios; row++) { + if ((bits_changed & (1 << row)) == 0) + continue; + + code = (row << 4) + col; + input_event(input_dev, EV_MSC, MSC_SCAN, code); + input_report_key(input_dev, + keypad->keycodes[code], + new_state[col] & (1 << row)); + } + } + input_sync(input_dev); + + memcpy(keypad->last_key_state, new_state, sizeof(new_state)); + + activate_all_cols(pdata, true); + + /* Enable IRQs again */ + spin_lock_irq(&keypad->lock); + keypad->scan_pending = false; + enable_row_irqs(keypad); + spin_unlock_irq(&keypad->lock); +} + +static irqreturn_t matrix_keypad_interrupt(int irq, void *id) +{ + struct matrix_keypad *keypad = id; + unsigned long flags; + + spin_lock_irqsave(&keypad->lock, flags); + + /* + * See if another IRQ beaten us to it and scheduled the + * scan already. In that case we should not try to + * disable IRQs again. + */ + if (unlikely(keypad->scan_pending || keypad->stopped)) + goto out; + + disable_row_irqs(keypad); + keypad->scan_pending = true; + schedule_delayed_work(&keypad->work, + msecs_to_jiffies(keypad->pdata->debounce_ms)); + +out: + spin_unlock_irqrestore(&keypad->lock, flags); + return IRQ_HANDLED; +} + +static int matrix_keypad_start(struct input_dev *dev) +{ + struct matrix_keypad *keypad = input_get_drvdata(dev); + + keypad->stopped = false; + mb(); + + /* + * Schedule an immediate key scan to capture current key state; + * columns will be activated and IRQs be enabled after the scan. + */ + schedule_delayed_work(&keypad->work, 0); + + return 0; +} + +static void matrix_keypad_stop(struct input_dev *dev) +{ + struct matrix_keypad *keypad = input_get_drvdata(dev); + + keypad->stopped = true; + mb(); + flush_work(&keypad->work.work); + /* + * matrix_keypad_scan() will leave IRQs enabled; + * we should disable them now. + */ + disable_row_irqs(keypad); +} + +#ifdef CONFIG_PM +static int matrix_keypad_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct matrix_keypad *keypad = platform_get_drvdata(pdev); + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i; + + matrix_keypad_stop(keypad->input_dev); + + if (device_may_wakeup(&pdev->dev)) + for (i = 0; i < pdata->num_row_gpios; i++) + enable_irq_wake(gpio_to_irq(pdata->row_gpios[i])); + + return 0; +} + +static int matrix_keypad_resume(struct platform_device *pdev) +{ + struct matrix_keypad *keypad = platform_get_drvdata(pdev); + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i; + + if (device_may_wakeup(&pdev->dev)) + for (i = 0; i < pdata->num_row_gpios; i++) + disable_irq_wake(gpio_to_irq(pdata->row_gpios[i])); + + matrix_keypad_start(keypad->input_dev); + + return 0; +} +#else +#define matrix_keypad_suspend NULL +#define matrix_keypad_resume NULL +#endif + +static int __devinit init_matrix_gpio(struct platform_device *pdev, + struct matrix_keypad *keypad) +{ + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i, err = -EINVAL; + + /* initialized strobe lines as outputs, activated */ + for (i = 0; i < pdata->num_col_gpios; i++) { + err = gpio_request(pdata->col_gpios[i], "matrix_kbd_col"); + if (err) { + dev_err(&pdev->dev, + "failed to request GPIO%d for COL%d\n", + pdata->col_gpios[i], i); + goto err_free_cols; + } + + gpio_direction_output(pdata->col_gpios[i], !pdata->active_low); + } + + for (i = 0; i < pdata->num_row_gpios; i++) { + err = gpio_request(pdata->row_gpios[i], "matrix_kbd_row"); + if (err) { + dev_err(&pdev->dev, + "failed to request GPIO%d for ROW%d\n", + pdata->row_gpios[i], i); + goto err_free_rows; + } + + gpio_direction_input(pdata->row_gpios[i]); + } + + for (i = 0; i < pdata->num_row_gpios; i++) { + err = request_irq(gpio_to_irq(pdata->row_gpios[i]), + matrix_keypad_interrupt, + IRQF_DISABLED | + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "matrix-keypad", keypad); + if (err) { + dev_err(&pdev->dev, + "Unable to acquire interrupt for GPIO line %i\n", + pdata->row_gpios[i]); + goto err_free_irqs; + } + } + + /* initialized as disabled - enabled by input->open */ + disable_row_irqs(keypad); + return 0; + +err_free_irqs: + while (--i >= 0) + free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad); + i = pdata->num_row_gpios; +err_free_rows: + while (--i >= 0) + gpio_free(pdata->row_gpios[i]); + i = pdata->num_col_gpios; +err_free_cols: + while (--i >= 0) + gpio_free(pdata->col_gpios[i]); + + return err; +} + +static int __devinit matrix_keypad_probe(struct platform_device *pdev) +{ + const struct matrix_keypad_platform_data *pdata; + const struct matrix_keymap_data *keymap_data; + struct matrix_keypad *keypad; + struct input_dev *input_dev; + unsigned short *keycodes; + int i; + int err; + + pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "no platform data defined\n"); + return -EINVAL; + } + + keymap_data = pdata->keymap_data; + if (!keymap_data) { + dev_err(&pdev->dev, "no keymap data defined\n"); + return -EINVAL; + } + + if (!keymap_data->max_keymap_size) { + dev_err(&pdev->dev, "invalid keymap data supplied\n"); + return -EINVAL; + } + + keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL); + keycodes = kzalloc(keymap_data->max_keymap_size * + sizeof(keypad->keycodes), + GFP_KERNEL); + input_dev = input_allocate_device(); + if (!keypad || !keycodes || !input_dev) { + err = -ENOMEM; + goto err_free_mem; + } + + keypad->input_dev = input_dev; + keypad->pdata = pdata; + keypad->keycodes = keycodes; + keypad->stopped = true; + INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan); + spin_lock_init(&keypad->lock); + + input_dev->name = pdev->name; + input_dev->id.bustype = BUS_HOST; + input_dev->dev.parent = &pdev->dev; + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); + input_dev->open = matrix_keypad_start; + input_dev->close = matrix_keypad_stop; + + input_dev->keycode = keycodes; + input_dev->keycodesize = sizeof(*keycodes); + input_dev->keycodemax = keymap_data->max_keymap_size; + + for (i = 0; i < keymap_data->keymap_size; i++) { + unsigned int key = keymap_data->keymap[i]; + unsigned int row = KEY_ROW(key); + unsigned int col = KEY_COL(key); + unsigned short code = KEY_VAL(key); + + keycodes[(row << 4) + col] = code; + __set_bit(code, input_dev->keybit); + } + __clear_bit(KEY_RESERVED, input_dev->keybit); + + input_set_capability(input_dev, EV_MSC, MSC_SCAN); + input_set_drvdata(input_dev, keypad); + + err = init_matrix_gpio(pdev, keypad); + if (err) + goto err_free_mem; + + err = input_register_device(keypad->input_dev); + if (err) + goto err_free_mem; + + device_init_wakeup(&pdev->dev, pdata->wakeup); + platform_set_drvdata(pdev, keypad); + + return 0; + +err_free_mem: + input_free_device(input_dev); + kfree(keycodes); + kfree(keypad); + return err; +} + +static int __devexit matrix_keypad_remove(struct platform_device *pdev) +{ + struct matrix_keypad *keypad = platform_get_drvdata(pdev); + const struct matrix_keypad_platform_data *pdata = keypad->pdata; + int i; + + device_init_wakeup(&pdev->dev, 0); + + for (i = 0; i < pdata->num_row_gpios; i++) { + free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad); + gpio_free(pdata->row_gpios[i]); + } + + for (i = 0; i < pdata->num_col_gpios; i++) + gpio_free(pdata->col_gpios[i]); + + input_unregister_device(keypad->input_dev); + platform_set_drvdata(pdev, NULL); + kfree(keypad->keycodes); + kfree(keypad); + + return 0; +} + +static struct platform_driver matrix_keypad_driver = { + .probe = matrix_keypad_probe, + .remove = __devexit_p(matrix_keypad_remove), + .suspend = matrix_keypad_suspend, + .resume = matrix_keypad_resume, + .driver = { + .name = "matrix-keypad", + .owner = THIS_MODULE, + }, +}; + +static int __init matrix_keypad_init(void) +{ + return platform_driver_register(&matrix_keypad_driver); +} + +static void __exit matrix_keypad_exit(void) +{ + platform_driver_unregister(&matrix_keypad_driver); +} + +module_init(matrix_keypad_init); +module_exit(matrix_keypad_exit); + +MODULE_AUTHOR("Marek Vasut "); +MODULE_DESCRIPTION("GPIO Driven Matrix Keypad Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:matrix-keypad"); diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h new file mode 100644 index 000000000000..7964516c6954 --- /dev/null +++ b/include/linux/input/matrix_keypad.h @@ -0,0 +1,65 @@ +#ifndef _MATRIX_KEYPAD_H +#define _MATRIX_KEYPAD_H + +#include +#include + +#define MATRIX_MAX_ROWS 16 +#define MATRIX_MAX_COLS 16 + +#define KEY(row, col, val) ((((row) & (MATRIX_MAX_ROWS - 1)) << 24) |\ + (((col) & (MATRIX_MAX_COLS - 1)) << 16) |\ + (val & 0xffff)) + +#define KEY_ROW(k) (((k) >> 24) & 0xff) +#define KEY_COL(k) (((k) >> 16) & 0xff) +#define KEY_VAL(k) ((k) & 0xffff) + +/** + * struct matrix_keymap_data - keymap for matrix keyboards + * @keymap: pointer to array of uint32 values encoded with KEY() macro + * representing keymap + * @keymap_size: number of entries (initialized) in this keymap + * @max_keymap_size: maximum size of keymap supported by the device + * + * This structure is supposed to be used by platform code to supply + * keymaps to drivers that implement matrix-like keypads/keyboards. + */ +struct matrix_keymap_data { + const uint32_t *keymap; + unsigned int keymap_size; + unsigned int max_keymap_size; +}; + +/** + * struct matrix_keypad_platform_data - platform-dependent keypad data + * @keymap_data: pointer to &matrix_keymap_data + * @row_gpios: array of gpio numbers reporesenting rows + * @col_gpios: array of gpio numbers reporesenting colums + * @num_row_gpios: actual number of row gpios used by device + * @num_col_gpios: actual number of col gpios used by device + * @col_scan_delay_us: delay, measured in microseconds, that is + * needed before we can keypad after activating column gpio + * @debounce_ms: debounce interval in milliseconds + * + * This structure represents platform-specific data that use used by + * matrix_keypad driver to perform proper initialization. + */ +struct matrix_keypad_platform_data { + const struct matrix_keymap_data *keymap_data; + + unsigned int row_gpios[MATRIX_MAX_ROWS]; + unsigned int col_gpios[MATRIX_MAX_COLS]; + unsigned int num_row_gpios; + unsigned int num_col_gpios; + + unsigned int col_scan_delay_us; + + /* key debounce interval in milli-second */ + unsigned int debounce_ms; + + bool active_low; + bool wakeup; +}; + +#endif /* _MATRIX_KEYPAD_H */ From 9a660a6e1a0ebef72cf792583777234483a40022 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 16:26:13 +0000 Subject: [PATCH 127/741] arch/sh/boards/mach-se/7206/io.c: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Paul Mundt --- arch/sh/boards/mach-se/7206/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/boards/mach-se/7206/io.c b/arch/sh/boards/mach-se/7206/io.c index 9c3a33210d61..180455642a43 100644 --- a/arch/sh/boards/mach-se/7206/io.c +++ b/arch/sh/boards/mach-se/7206/io.c @@ -50,7 +50,7 @@ unsigned char se7206_inb_p(unsigned long port) unsigned short se7206_inw(unsigned long port) { - return *port2adr(port);; + return *port2adr(port); } void se7206_outb(unsigned char value, unsigned long port) From ec87805c63a96e8863385d79c864801a572a4ab8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 29 Jun 2009 07:40:00 +0000 Subject: [PATCH 128/741] sh: ms7724se: Enable sh_eth in defconfig. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/sh/configs/se7724_defconfig | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 3840270283e4..3ee783a0a075 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.30 -# Thu Jun 18 16:09:05 2009 +# Mon Jun 29 16:28:43 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -14,6 +14,7 @@ CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_IRQ_PER_CPU=y CONFIG_GENERIC_GPIO=y CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y @@ -28,7 +29,9 @@ CONFIG_HAVE_LATENCYTOP_SUPPORT=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_ARCH_NO_VIRT_TO_BUS=y CONFIG_ARCH_HAS_DEFAULT_IDLE=y +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # # General setup @@ -88,10 +91,12 @@ CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y +CONFIG_HAVE_PERF_COUNTERS=y # # Performance Counters # +# CONFIG_PERF_COUNTERS is not set CONFIG_VM_EVENT_COUNTERS=y # CONFIG_STRIP_ASM_SYMS is not set CONFIG_COMPAT_BRK=y @@ -107,6 +112,10 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y CONFIG_HAVE_DMA_API_DEBUG=y + +# +# GCOV-based kernel profiling +# # CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y @@ -119,7 +128,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y -# CONFIG_LBD is not set +CONFIG_LBDAF=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -584,7 +593,6 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set -# CONFIG_SCSI_BNX2_ISCSI is not set # CONFIG_LIBFC is not set # CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set @@ -624,7 +632,7 @@ CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_AX88796 is not set # CONFIG_STNIC is not set -# CONFIG_SH_ETH is not set +CONFIG_SH_ETH=y CONFIG_SMC91X=y # CONFIG_ENC28J60 is not set # CONFIG_ETHOC is not set @@ -801,6 +809,11 @@ CONFIG_SPI_BITBANG=y # # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set + +# +# PPS support +# +# CONFIG_PPS is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y CONFIG_GPIOLIB=y # CONFIG_GPIO_SYSFS is not set @@ -851,6 +864,8 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set # CONFIG_MFD_PCF50633 is not set +# CONFIG_AB3100_CORE is not set +# CONFIG_EZX_PCAP is not set # CONFIG_REGULATOR is not set CONFIG_MEDIA_SUPPORT=y @@ -1196,6 +1211,7 @@ CONFIG_RTC_DRV_PCF8563=y # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set # CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set # # SPI RTC drivers @@ -1260,6 +1276,7 @@ CONFIG_FS_MBCACHE=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_BTRFS_FS is not set CONFIG_FILE_LOCKING=y From 4078c444cf667f018c3fc7ebf141131a2b7c9480 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Jun 2009 00:41:11 -0700 Subject: [PATCH 129/741] perf_counter, x86: Update x86_pmu after WARN() The print out should read the value before changing the value. Signed-off-by: Yinghai Lu Cc: Peter Zijlstra LKML-Reference: <4A487017.4090007@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b83474b6021a..d4cf4ce19aac 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1496,17 +1496,17 @@ void __init init_hw_perf_counters(void) pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", x86_pmu.num_counters, X86_PMC_MAX_GENERIC); + x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } perf_counter_mask = (1 << x86_pmu.num_counters) - 1; perf_max_counters = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; } perf_counter_mask |= From 238a24f626628cb16a3015f332d649f08246ca89 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 29 Jun 2009 15:55:10 +0800 Subject: [PATCH 130/741] tracing/fastboot: Document the need of initcall_debug To use boot tracer, one should pass initcall_debug as well as ftrace=initcall to the command line. Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <4A48735E.9050002@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1551f47e7669..019f380fd764 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -226,13 +226,13 @@ config BOOT_TRACER the timings of the initcalls and traces key events and the identity of tasks that can cause boot delays, such as context-switches. - Its aim is to be parsed by the /scripts/bootgraph.pl tool to + Its aim is to be parsed by the scripts/bootgraph.pl tool to produce pretty graphics about boot inefficiencies, giving a visual representation of the delays during initcalls - but the raw /debug/tracing/trace text output is readable too. - You must pass in ftrace=initcall to the kernel command line - to enable this on bootup. + You must pass in initcall_debug and ftrace=initcall to the kernel + command line to enable this on bootup. config TRACE_BRANCH_PROFILING bool From 59c5fe6d848cae8fd51498d17532f5aad3866f98 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 29 Jun 2009 12:11:50 +0100 Subject: [PATCH 131/741] [ARM] 5562/2: at91: add gpio button support for at91sam9g20ek This adds input keyboard gpio support on at91sam9g20ek board. It adds button 3 and 4. Signed-off-by: Nicolas Ferre Acked-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91/board-sam9g20ek.c | 54 ++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c index cc270beadd5d..a55398ed1211 100644 --- a/arch/arm/mach-at91/board-sam9g20ek.c +++ b/arch/arm/mach-at91/board-sam9g20ek.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -218,6 +220,56 @@ static struct gpio_led ek_leds[] = { } }; + +/* + * GPIO Buttons + */ +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +static struct gpio_keys_button ek_buttons[] = { + { + .gpio = AT91_PIN_PA30, + .code = BTN_3, + .desc = "Button 3", + .active_low = 1, + .wakeup = 1, + }, + { + .gpio = AT91_PIN_PA31, + .code = BTN_4, + .desc = "Button 4", + .active_low = 1, + .wakeup = 1, + } +}; + +static struct gpio_keys_platform_data ek_button_data = { + .buttons = ek_buttons, + .nbuttons = ARRAY_SIZE(ek_buttons), +}; + +static struct platform_device ek_button_device = { + .name = "gpio-keys", + .id = -1, + .num_resources = 0, + .dev = { + .platform_data = &ek_button_data, + } +}; + +static void __init ek_add_device_buttons(void) +{ + at91_set_gpio_input(AT91_PIN_PA30, 1); /* btn3 */ + at91_set_deglitch(AT91_PIN_PA30, 1); + at91_set_gpio_input(AT91_PIN_PA31, 1); /* btn4 */ + at91_set_deglitch(AT91_PIN_PA31, 1); + + platform_device_register(&ek_button_device); +} +#else +static void __init ek_add_device_buttons(void) {} +#endif + + static struct i2c_board_info __initdata ek_i2c_devices[] = { { I2C_BOARD_INFO("24c512", 0x50), @@ -245,6 +297,8 @@ static void __init ek_board_init(void) at91_add_device_i2c(ek_i2c_devices, ARRAY_SIZE(ek_i2c_devices)); /* LEDs */ at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); + /* Push Buttons */ + ek_add_device_buttons(); /* PCK0 provides MCLK to the WM8731 */ at91_set_B_periph(AT91_PIN_PC1, 0); /* SSC (for WM8731) */ From bf92df30df909710c498d05620e2df1be1ef779b Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 29 Jun 2009 11:31:45 +0800 Subject: [PATCH 132/741] intel-iommu: Only avoid flushing device IOTLB for domain ID 0 in caching mode In caching mode, domain ID 0 is reserved for non-present to present mapping flush. Device IOTLB doesn't need to be flushed in this case. Previously we were avoiding the flush for domain zero, even if the IOMMU wasn't in caching mode and domain zero wasn't special. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 420afa887283..3cad7006ed8e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1054,7 +1054,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, else iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH); - if (did) + + /* + * In caching mode, domain ID 0 is reserved for non-present to present + * mapping flush. Device IOTLB doesn't need to be flushed in this case. + */ + if (!cap_caching_mode(iommu->cap) || did) iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } From b213203e475212a69ad6fedfb73464087e317148 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 26 Jun 2009 18:50:28 +0100 Subject: [PATCH 133/741] intel-iommu: Create new iommu_domain_identity_map() function We'll want to do this to a _domain_ (the si_domain) rather than a PCI device. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 62 +++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3cad7006ed8e..3a4f347e2f88 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1849,25 +1849,12 @@ error: static int iommu_identity_mapping; -static int iommu_prepare_identity_map(struct pci_dev *pdev, - unsigned long long start, - unsigned long long end) +static int iommu_domain_identity_map(struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; unsigned long size; unsigned long long base; - int ret; - - printk(KERN_INFO - "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); - if (iommu_identity_mapping) - domain = si_domain; - else - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; /* The address might not be aligned */ base = start & PAGE_MASK; @@ -1876,31 +1863,54 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, if (!reserve_iova(&domain->iovad, IOVA_PFN(base), IOVA_PFN(base + size) - 1)) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); - ret = -ENOMEM; - goto error; + return -ENOMEM; } - pr_debug("Mapping reserved region %lx@%llx for %s\n", - size, base, pci_name(pdev)); + pr_debug("Mapping reserved region %lx@%llx for domain %d\n", + size, base, domain->id); /* * RMRR range might have overlap with physical memory range, * clear it first */ dma_pte_clear_range(domain, base, base + size); - ret = domain_page_mapping(domain, base, base, size, - DMA_PTE_READ|DMA_PTE_WRITE); + return domain_page_mapping(domain, base, base, size, + DMA_PTE_READ|DMA_PTE_WRITE); +} + +static int iommu_prepare_identity_map(struct pci_dev *pdev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + + if (iommu_identity_mapping) + domain = si_domain; + else + /* page table init */ + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = iommu_domain_identity_map(domain, start, end); if (ret) goto error; /* context entry init */ ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); - if (!ret) - return 0; -error: + if (ret) + goto error; + + return 0; + + error: domain_exit(domain); return ret; - } static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, From c7ab48d2acaf959e4d59c3f55d12fdb7ca9afd7c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 26 Jun 2009 19:10:36 +0100 Subject: [PATCH 134/741] intel-iommu: Clean up identity mapping code, remove CONFIG_DMAR_GFX_WA There's no need for the GFX workaround now we have 'iommu=pt' for the cases where people really care about performance. There's no need to have a special case for just one type of device. This also speeds up the iommu=pt path and reduces memory usage by setting up the si_domain _once_ and then using it for all devices, rather than giving each device its own private page tables. Signed-off-by: David Woodhouse --- arch/x86/Kconfig | 15 +----- drivers/pci/intel-iommu.c | 107 ++++++++++++-------------------------- 2 files changed, 34 insertions(+), 88 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f9..c07f72205909 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON recommended you say N here while the DMAR code remains experimental. -config DMAR_GFX_WA - def_bool y - prompt "Support for Graphics workaround" - depends on DMAR - ---help--- - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA. - config DMAR_FLOPPY_WA def_bool y depends on DMAR ---help--- - Floppy disk drivers are know to bypass DMA API calls + Floppy disk drivers are known to bypass DMA API calls thereby failing to work when IOMMU is enabled. This workaround will setup a 1:1 mapping for the first - 16M to make floppy (an ISA device) work. + 16MiB to make floppy (an ISA device) work. config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3a4f347e2f88..fc121967cb5b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1889,11 +1889,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); - if (iommu_identity_mapping) - domain = si_domain; - else - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -1922,64 +1918,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address + 1); } -struct iommu_prepare_data { - struct pci_dev *pdev; - int ret; -}; - -static int __init iommu_prepare_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct iommu_prepare_data *data; - - data = (struct iommu_prepare_data *)datax; - - data->ret = iommu_prepare_identity_map(data->pdev, - start_pfn<ret; - -} - -static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) -{ - int nid; - struct iommu_prepare_data data; - - data.pdev = pdev; - data.ret = 0; - - for_each_online_node(nid) { - work_with_active_regions(nid, iommu_prepare_work_fn, &data); - if (data.ret) - return data.ret; - } - return data.ret; -} - -#ifdef CONFIG_DMAR_GFX_WA -static void __init iommu_prepare_gfx_mapping(void) -{ - struct pci_dev *pdev = NULL; - int ret; - - for_each_pci_dev(pdev) { - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || - !IS_GFX_DEVICE(pdev)) - continue; - printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", - pci_name(pdev)); - ret = iommu_prepare_with_active_regions(pdev); - if (ret) - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); - } -} -#else /* !CONFIG_DMAR_GFX_WA */ -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif - #ifdef CONFIG_DMAR_FLOPPY_WA static inline void iommu_prepare_isa(void) { @@ -1990,12 +1928,12 @@ static inline void iommu_prepare_isa(void) if (!pdev) return; - printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); + printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " - "floppy might not work\n"); + printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " + "floppy might not work\n"); } #else @@ -2023,16 +1961,30 @@ static int __init init_context_pass_through(void) } static int md_domain_init(struct dmar_domain *domain, int guest_width); + +static int __init si_domain_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + int *ret = datax; + + *ret = iommu_domain_identity_map(si_domain, + (uint64_t)start_pfn << PAGE_SHIFT, + (uint64_t)end_pfn << PAGE_SHIFT); + return *ret; + +} + static int si_domain_init(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 0; + int nid, ret = 0; si_domain = alloc_domain(); if (!si_domain) return -EFAULT; + pr_debug("Identity mapping domain is domain %d\n", si_domain->id); for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); @@ -2049,6 +2001,12 @@ static int si_domain_init(void) si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + for_each_online_node(nid) { + work_with_active_regions(nid, si_domain_work_fn, &ret); + if (ret) + return ret; + } + return 0; } @@ -2102,13 +2060,14 @@ static int iommu_prepare_static_identity_mapping(void) if (ret) return -EFAULT; - printk(KERN_INFO "IOMMU: Setting identity map:\n"); for_each_pci_dev(pdev) { - ret = iommu_prepare_with_active_regions(pdev); - if (ret) { - printk(KERN_INFO "1:1 mapping to one domain failed.\n"); - return -EFAULT; - } + printk(KERN_INFO "IOMMU: identity mapping for device %s\n", + pci_name(pdev)); + + ret = domain_context_mapping(si_domain, pdev, + CONTEXT_TT_MULTI_LEVEL); + if (ret) + return ret; ret = domain_add_dev_info(si_domain, pdev); if (ret) return ret; @@ -2299,8 +2258,6 @@ int __init init_dmars(void) } } - iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); } From dd4e831960e4f0214480fa96a53ca9bb7dd04927 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 16:21:20 +0100 Subject: [PATCH 135/741] intel-iommu: Change dma_set_pte_addr() to dma_set_pte_pfn() Add some helpers for converting between VT-d and normal system pfns, since system pages can be larger than VT-d pages. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc121967cb5b..852f40a913d4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -65,6 +65,26 @@ #define PHYSICAL_PAGE_MASK PAGE_MASK #endif +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) +{ + return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); +} + +static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} + /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; @@ -207,9 +227,9 @@ static inline u64 dma_pte_addr(struct dma_pte *pte) return (pte->val & VTD_PAGE_MASK); } -static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) +static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) { - pte->val |= (addr & VTD_PAGE_MASK); + pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; } static inline bool dma_pte_present(struct dma_pte *pte) @@ -702,7 +722,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) return NULL; } domain_flush_cache(domain, tmp_page, PAGE_SIZE); - dma_set_pte_addr(pte, virt_to_phys(tmp_page)); + dma_set_pte_pfn(pte, virt_to_dma_pfn(tmp_page)); /* * high level table always sets r/w, last level page * table control read/write @@ -1648,7 +1668,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); + dma_set_pte_pfn(pte, start_pfn); dma_set_pte_prot(pte, prot); if (prot & DMA_PTE_SNP) dma_set_pte_snp(pte); From 77dfa56c94d2855a25ff552b74980a5538e129f8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 16:40:08 +0100 Subject: [PATCH 136/741] intel-iommu: Change address_level_offset() to pfn_level_offset() We're shifting the inputs for now, but that'll change... Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 852f40a913d4..529c1c13048f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -671,9 +671,9 @@ static inline unsigned int level_to_offset_bits(int level) return (12 + (level - 1) * LEVEL_STRIDE); } -static inline int address_level_offset(u64 addr, int level) +static inline int pfn_level_offset(unsigned long pfn, int level) { - return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); + return (pfn >> (level_to_offset_bits(level) - 12)) & LEVEL_MASK; } static inline u64 level_mask(int level) @@ -708,7 +708,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) while (level > 0) { void *tmp_page; - offset = address_level_offset(addr, level); + offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, level); pte = &parent[offset]; if (level == 1) break; @@ -749,7 +749,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, parent = domain->pgd; while (level <= total) { - offset = address_level_offset(addr, total); + offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, total); pte = &parent[offset]; if (level == total) return pte; From 90dcfb5eb2fd427b16135a14f176a6902750b6b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 17:14:59 +0100 Subject: [PATCH 137/741] intel-iommu: Change dma_addr_level_pte() to dma_pfn_level_pte() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 529c1c13048f..edd39d348a98 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -740,8 +740,9 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) } /* return address's pte at specific level */ -static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, - int level) +static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, + unsigned long pfn, + int level) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -749,7 +750,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, parent = domain->pgd; while (level <= total) { - offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, total); + offset = pfn_level_offset(pfn, total); pte = &parent[offset]; if (level == total) return pte; @@ -768,7 +769,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) struct dma_pte *pte = NULL; /* get last level pte */ - pte = dma_addr_level_pte(domain, addr, 1); + pte = dma_pfn_level_pte(domain, addr >> VTD_PAGE_SHIFT, 1); if (pte) { dma_clear_pte(pte); @@ -817,7 +818,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; while (tmp < end) { - pte = dma_addr_level_pte(domain, tmp, level); + pte = dma_pfn_level_pte(domain, tmp >> VTD_PAGE_SHIFT, + level); if (pte) { free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); From a75f7cf94f01717c5103138319b96752ee2a2be9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 17:44:39 +0100 Subject: [PATCH 138/741] intel-iommu: Make dma_pte_clear_one() take pfn not address Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index edd39d348a98..40eae2097aca 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -764,12 +764,12 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, } /* clear one page's page table */ -static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) +static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) { struct dma_pte *pte = NULL; /* get last level pte */ - pte = dma_pfn_level_pte(domain, addr >> VTD_PAGE_SHIFT, 1); + pte = dma_pfn_level_pte(domain, pfn, 1); if (pte) { dma_clear_pte(pte); @@ -792,7 +792,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) /* we don't need lock here, nobody else touches the iova range */ while (npages--) { - dma_pte_clear_one(domain, start); + dma_pte_clear_one(domain, start >> VTD_PAGE_SHIFT); start += VTD_PAGE_SIZE; } } From 66eae8469e4e4ba6f4ca7ef82103c78f6d645583 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 19:00:32 +0100 Subject: [PATCH 139/741] intel-iommu: Don't just mask out too-big physical addresses; BUG() instead Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 40eae2097aca..ad367f53a2bb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -700,8 +700,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) unsigned long flags; BUG_ON(!domain->pgd); - - addr &= (((u64)1) << addr_width) - 1; + BUG_ON(addr >> addr_width); parent = domain->pgd; spin_lock_irqsave(&domain->mapping_lock, flags); @@ -783,8 +782,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) int addr_width = agaw_to_width(domain->agaw); int npages; - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; + BUG_ON(start >> addr_width); + BUG_ON((end-1) >> addr_width); + /* in case it's partial page */ start &= PAGE_MASK; end = PAGE_ALIGN(end); @@ -807,8 +807,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int level; u64 tmp; - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; + BUG_ON(start >> addr_width); + BUG_ON(end >> addr_width); /* we don't need lock here, nobody else touches the iova range */ level = 2; @@ -1654,7 +1654,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, int index; int addr_width = agaw_to_width(domain->agaw); - hpa &= (((u64)1) << addr_width) - 1; + BUG_ON(hpa >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; From a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 Jun 2009 14:07:56 +0200 Subject: [PATCH 140/741] netfilter: tcp conntrack: fix unacknowledged data detection with NAT When NAT helpers change the TCP packet size, the highest seen sequence number needs to be corrected. This is currently only done upwards, when the packet size is reduced the sequence number is unchanged. This causes TCP conntrack to falsely detect unacknowledged data and decrease the timeout. Fix by updating the highest seen sequence number in both directions after packet mangling. Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 4 ++-- net/ipv4/netfilter/nf_nat_helper.c | 17 +++++++++++------ net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a632689b61b4..cbdd6284996d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -258,8 +258,8 @@ static inline bool nf_ct_kill(struct nf_conn *ct) /* Update TCP window tracking data when NAT mangles the packet */ extern void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir); + struct nf_conn *ct, int dir, + s16 offset); /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 155c008626c8..09172a65d9b6 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, ct, ctinfo); /* Tell TCP window tracking about seq change */ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo)); + ct, CTINFO2DIR(ctinfo), + (int)rep_len - (int)match_len); nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } @@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; + s16 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; @@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + seqoff = this_way->offset_after; else - newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + seqoff = this_way->offset_before; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + ackoff = other_way->offset_after; else - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); @@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); return 1; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 33fc0a443f3d..97a82ba75376 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir) + struct nf_conn *ct, int dir, + s16 offset) { const struct tcphdr *tcph = (const void *)skb->data + dataoff; const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; @@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, ct->proto.tcp.seen[dir].td_end)) + if (ct->proto.tcp.seen[dir].td_end + offset == end) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; spin_unlock_bh(&ct->lock); From 04b18e65dd5a3e544f07f4bcfa8fb52704a1833b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 19:15:01 +0100 Subject: [PATCH 141/741] intel-iommu: Make dma_pte_clear_range() use pfns Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ad367f53a2bb..d4217f737159 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -779,21 +779,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) /* clear last level pte, a tlb flush should be followed */ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) { - int addr_width = agaw_to_width(domain->agaw); - int npages; + unsigned long start_pfn = IOVA_PFN(start); + unsigned long end_pfn = IOVA_PFN(end-1); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - BUG_ON(start >> addr_width); - BUG_ON((end-1) >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && end_pfn >> addr_width); - /* in case it's partial page */ - start &= PAGE_MASK; - end = PAGE_ALIGN(end); - npages = (end - start) / VTD_PAGE_SIZE; - - /* we don't need lock here, nobody else touches the iova range */ - while (npages--) { - dma_pte_clear_one(domain, start >> VTD_PAGE_SHIFT); - start += VTD_PAGE_SIZE; + /* we don't need lock here; nobody else touches the iova range */ + while (start_pfn <= end_pfn) { + dma_pte_clear_one(domain, start_pfn); + start_pfn++; } } From 595badf5d65d50300319e6178e6df005ea501f70 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 22:09:11 +0100 Subject: [PATCH 142/741] intel-iommu: Make dma_pte_clear_range() take pfns as argument Noting that this is now an _inclusive_ range. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d4217f737159..ff8b7ce4a013 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -56,6 +56,7 @@ #define MAX_AGAW_WIDTH 64 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) +#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) @@ -777,17 +778,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) } /* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) +static void dma_pte_clear_range(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) { - unsigned long start_pfn = IOVA_PFN(start); - unsigned long end_pfn = IOVA_PFN(end-1); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && end_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here; nobody else touches the iova range */ - while (start_pfn <= end_pfn) { + while (start_pfn <= last_pfn) { dma_pte_clear_one(domain, start_pfn); start_pfn++; } @@ -1424,7 +1425,7 @@ static void domain_exit(struct dmar_domain *domain) end = end & (~PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ dma_pte_free_pagetable(domain, 0, end); @@ -1890,7 +1891,8 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, * RMRR range might have overlap with physical memory range, * clear it first */ - dma_pte_clear_range(domain, base, base + size); + dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, + (base + size - 1) >> VTD_PAGE_SHIFT); return domain_page_mapping(domain, base, base, size, DMA_PTE_READ|DMA_PTE_WRITE); @@ -2618,7 +2620,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + size - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { @@ -2710,7 +2713,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, start_addr = iova->pfn_lo << PAGE_SHIFT; /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + size - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); @@ -2792,8 +2796,9 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne size, prot); if (ret) { /* clear the page */ - dma_pte_clear_range(domain, start_addr, - start_addr + offset); + dma_pte_clear_range(domain, + start_addr >> VTD_PAGE_SHIFT, + (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + offset); @@ -3382,7 +3387,7 @@ static void vm_domain_exit(struct dmar_domain *domain) end = end & (~VTD_PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ dma_pte_free_pagetable(domain, 0, end); @@ -3526,7 +3531,8 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, /* The address might not be aligned */ base = iova & VTD_PAGE_MASK; size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(dmar_domain, base, base + size); + dma_pte_clear_range(dmar_domain, base >> VTD_PAGE_SHIFT, + (base + size - 1) >> VTD_PAGE_SHIFT); if (dmar_domain->max_addr == base + size) dmar_domain->max_addr = base; From 8a3af79361e85db6fec4173ef1916322471c19e3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Jun 2009 14:28:27 +0200 Subject: [PATCH 143/741] netfilter: headers_check fix: linux/netfilter/xt_osf.h fix the following 'make headers_check' warnings: usr/include/linux/netfilter/xt_osf.h:40: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_osf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h index fd2272e0959a..18afa495f973 100644 --- a/include/linux/netfilter/xt_osf.h +++ b/include/linux/netfilter/xt_osf.h @@ -20,6 +20,8 @@ #ifndef _XT_OSF_H #define _XT_OSF_H +#include + #define MAXGENRELEN 32 #define XT_OSF_GENRE (1<<0) From 6660c63a79a639b86e3a709e25a8c4fc3ab24770 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jun 2009 22:41:00 +0100 Subject: [PATCH 144/741] intel-iommu: Make dma_pte_free_pagetable() use pfns Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 40 +++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ff8b7ce4a013..1526864a9d6f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -669,27 +669,27 @@ static inline int width_to_agaw(int width) static inline unsigned int level_to_offset_bits(int level) { - return (12 + (level - 1) * LEVEL_STRIDE); + return (level - 1) * LEVEL_STRIDE; } static inline int pfn_level_offset(unsigned long pfn, int level) { - return (pfn >> (level_to_offset_bits(level) - 12)) & LEVEL_MASK; + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline u64 level_mask(int level) +static inline unsigned long level_mask(int level) { - return ((u64)-1 << level_to_offset_bits(level)); + return -1UL << level_to_offset_bits(level); } -static inline u64 level_size(int level) +static inline unsigned long level_size(int level) { - return ((u64)1 << level_to_offset_bits(level)); + return 1UL << level_to_offset_bits(level); } -static inline u64 align_to_level(u64 addr, int level) +static inline unsigned long align_to_level(unsigned long pfn, int level) { - return ((addr + level_size(level) - 1) & level_mask(level)); + return (pfn + level_size(level) - 1) & level_mask(level); } static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) @@ -798,25 +798,29 @@ static void dma_pte_clear_range(struct dmar_domain *domain, static void dma_pte_free_pagetable(struct dmar_domain *domain, u64 start, u64 end) { - int addr_width = agaw_to_width(domain->agaw); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned long start_pfn = start >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (end-1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; int total = agaw_to_level(domain->agaw); int level; - u64 tmp; + unsigned long tmp; - BUG_ON(start >> addr_width); - BUG_ON(end >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here, nobody else touches the iova range */ level = 2; while (level <= total) { - tmp = align_to_level(start, level); - if (tmp >= end || (tmp + level_size(level) > end)) + tmp = align_to_level(start_pfn, level); + + /* Only clear this pte/pmd if we're asked to clear its + _whole_ range */ + if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp < end) { - pte = dma_pfn_level_pte(domain, tmp >> VTD_PAGE_SHIFT, - level); + while (tmp <= last_pfn) { + pte = dma_pfn_level_pte(domain, tmp, level); if (pte) { free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); @@ -828,7 +832,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, level++; } /* free pgd */ - if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { + if (start == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); domain->pgd = NULL; } From d794dc9b302c2781c571c10dedb8094e223d31b8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 00:27:49 +0100 Subject: [PATCH 145/741] intel-iommu: Make dma_pte_free_pagetable() take pfns as argument With some cleanup of intel_unmap_page(), intel_unmap_sg() and vm_domain_exit() to no longer play with 64-bit addresses. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 70 ++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1526864a9d6f..fc593adb049a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -796,11 +796,10 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, - u64 start, u64 end) + unsigned long start_pfn, + unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long start_pfn = start >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (end-1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; int total = agaw_to_level(domain->agaw); int level; @@ -832,7 +831,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, level++; } /* free pgd */ - if (start == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); domain->pgd = NULL; } @@ -1416,7 +1415,6 @@ static void domain_exit(struct dmar_domain *domain) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - u64 end; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1425,14 +1423,12 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); for_each_active_iommu(iommu, drhd) if (test_bit(iommu->seq_id, &domain->iommu_bmp)) @@ -2601,7 +2597,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; struct intel_iommu *iommu; @@ -2617,20 +2613,22 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, if (!iova) return; - start_addr = iova->pfn_lo << PAGE_SHIFT; - size = aligned_size((u64)dev_addr, size); + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - pr_debug("Device %s unmapping: %zx@%llx\n", - pci_name(pdev), size, (unsigned long long)start_addr); + pr_debug("Device %s unmapping: pfn %lx-%lx\n", + pci_name(pdev), start_pfn, last_pfn); /* clear the whole page */ - dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_free_pagetable(domain, start_pfn, last_pfn); + if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, domain->id, + start_pfn << VTD_PAGE_SHIFT, + last_pfn - start_pfn + 1); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2688,14 +2686,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; - size_t size = 0; - phys_addr_t addr; - struct scatterlist *sg; struct intel_iommu *iommu; if (iommu_no_mapping(pdev)) @@ -2709,21 +2703,19 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); if (!iova) return; - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } - start_addr = iova->pfn_lo << PAGE_SHIFT; + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; /* clear the whole page */ - dma_pte_clear_range(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + size - 1) >> VTD_PAGE_SHIFT); - /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + /* free page tables */ + dma_pte_free_pagetable(domain, start_pfn, last_pfn); + + iommu_flush_iotlb_psi(iommu, domain->id, + start_pfn << VTD_PAGE_SHIFT, + (last_pfn - start_pfn + 1)); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2804,8 +2796,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_addr >> VTD_PAGE_SHIFT, (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, - start_addr + offset); + dma_pte_free_pagetable(domain, start_addr >> VTD_PAGE_SHIFT, + (start_addr + offset - 1) >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); return 0; @@ -3378,8 +3370,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) static void vm_domain_exit(struct dmar_domain *domain) { - u64 end; - /* Domain 0 is reserved, so dont process it */ if (!domain) return; @@ -3387,14 +3377,12 @@ static void vm_domain_exit(struct dmar_domain *domain) vm_domain_remove_all_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~VTD_PAGE_MASK); /* clear ptes */ dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); iommu_free_vm_domain(domain); free_domain_mem(domain); From 163cc52ccd2cc5c5ae4e1c886f6fde8547feed2a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 00:51:17 +0100 Subject: [PATCH 146/741] intel-iommu: Clean up intel_iommu_unmap_range() Use unaligned address for domain->max_addr. That algorithm isn't ideal anyway -- we should probably just look at the last iova in the tree. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fc593adb049a..21dc77311863 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3491,7 +3491,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) prot |= DMA_PTE_SNP; - max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); + max_addr = iova + size; if (dmar_domain->max_addr < max_addr) { int min_agaw; u64 end; @@ -3518,16 +3518,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - dma_addr_t base; - /* The address might not be aligned */ - base = iova & VTD_PAGE_MASK; - size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(dmar_domain, base >> VTD_PAGE_SHIFT, - (base + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, + (iova + size - 1) >> VTD_PAGE_SHIFT); - if (dmar_domain->max_addr == base + size) - dmar_domain->max_addr = base; + if (dmar_domain->max_addr == iova + size) + dmar_domain->max_addr = iova; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, From d6d3f08b0fd998b647a05540cedd11a067b72867 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 29 Jun 2009 14:31:46 +0200 Subject: [PATCH 147/741] netfilter: xtables: conntrack match revision 2 As reported by Philip, the UNTRACKED state bit does not fit within the 8-bit state_mask member. Enlarge state_mask and give status_mask a few more bits too. Reported-by: Philip Craig References: http://markmail.org/thread/b7eg6aovfh4agyz7 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_conntrack.h | 13 +++++ net/netfilter/xt_conntrack.c | 66 +++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 3430c7751948..7ae05338e94c 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -81,4 +81,17 @@ struct xt_conntrack_mtinfo1 { __u8 state_mask, status_mask; }; +struct xt_conntrack_mtinfo2 { + union nf_inet_addr origsrc_addr, origsrc_mask; + union nf_inet_addr origdst_addr, origdst_mask; + union nf_inet_addr replsrc_addr, replsrc_mask; + union nf_inet_addr repldst_addr, repldst_mask; + __u32 expires_min, expires_max; + __u16 l4proto; + __be16 origsrc_port, origdst_port; + __be16 replsrc_port, repldst_port; + __u16 match_flags, invert_flags; + __u16 state_mask, status_mask; +}; + #endif /*_XT_CONNTRACK_H*/ diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0b7139f3dd78..fc581800698e 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, @@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, @@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, @@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, @@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct, } static inline bool -ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, +ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; @@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, static bool conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = par->matchinfo; + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } +static bool +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; + struct xt_match_param newpar = *par; + + newpar.matchinfo = *info; + return conntrack_mt(skb, &newpar); +} + static bool conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } +static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_conntrack_mtinfo1 *info = par->matchinfo; + struct xt_conntrack_mtinfo2 *up; + int ret = conntrack_mt_check(par); + + if (ret < 0) + return ret; + + up = kmalloc(sizeof(*up), GFP_KERNEL); + if (up == NULL) { + nf_ct_l3proto_module_put(par->family); + return -ENOMEM; + } + + /* + * The strategy here is to minimize the overhead of v1 matching, + * by prebuilding a v2 struct and putting the pointer into the + * v1 dataspace. + */ + memcpy(up, info, offsetof(typeof(*info), state_mask)); + up->state_mask = info->state_mask; + up->status_mask = info->status_mask; + *(void **)info = up; + return true; +} + static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } +static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_conntrack_mtinfo2 **info = par->matchinfo; + kfree(*info); + conntrack_mt_destroy(par); +} + #ifdef CONFIG_COMPAT struct compat_xt_conntrack_info { @@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), + .match = conntrack_mt_v1, + .checkentry = conntrack_mt_check_v1, + .destroy = conntrack_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "conntrack", + .revision = 2, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, From b026fd28ea23af24a3eea6e5be3f3d0193a8e87d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 10:37:25 +0100 Subject: [PATCH 148/741] intel-iommu: Change addr_to_dma_pte() to pfn_to_dma_pte() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 21dc77311863..dfbabd151a9c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -692,23 +692,24 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } -static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) +static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + unsigned long pfn) { - int addr_width = agaw_to_width(domain->agaw); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; unsigned long flags; BUG_ON(!domain->pgd); - BUG_ON(addr >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; spin_lock_irqsave(&domain->mapping_lock, flags); while (level > 0) { void *tmp_page; - offset = pfn_level_offset(addr >> VTD_PAGE_SHIFT, level); + offset = pfn_level_offset(pfn, level); pte = &parent[offset]; if (level == 1) break; @@ -1660,7 +1661,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; index = 0; while (start_pfn < end_pfn) { - pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); + pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else @@ -3533,7 +3534,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = addr_to_dma_pte(dmar_domain, iova); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); if (pte) phys = dma_pte_addr(pte); From 1c5a46ed49e37f56f8aa9000bb1c2ac59670c372 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 10:53:37 +0100 Subject: [PATCH 149/741] intel-iommu: Clean up address handling in domain_page_mapping() No more masking and alignment; just use pfns. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dfbabd151a9c..f08d7865fe00 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1647,20 +1647,18 @@ static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot) { - u64 start_pfn, end_pfn; + unsigned long start_pfn = hpa >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; - int index; - int addr_width = agaw_to_width(domain->agaw); + int index = 0; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - BUG_ON(hpa >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - iova &= PAGE_MASK; - start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; - end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; - index = 0; - while (start_pfn < end_pfn) { + + while (start_pfn <= last_pfn) { pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); if (!pte) return -ENOMEM; From 61df744314079e8cb8cdec75f517cf0e704e41ef Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 11:55:58 +0100 Subject: [PATCH 150/741] intel-iommu: Introduce domain_pfn_mapping() ... and use it in the trivial cases; the other callers want individual (and bisectable) attention, since I screwed them up the first time... Make the BUG_ON() happen on too-large virtual address rather than physical address, too. That's the one we care about. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f08d7865fe00..7540ef91d5f7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1643,40 +1643,48 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } -static int -domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) { - unsigned long start_pfn = hpa >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; struct dma_pte *pte; - int index = 0; int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - while (start_pfn <= last_pfn) { - pte = pfn_to_dma_pte(domain, (iova >> VTD_PAGE_SHIFT) + index); + while (nr_pages--) { + pte = pfn_to_dma_pte(domain, iov_pfn); if (!pte) return -ENOMEM; /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_pfn(pte, start_pfn); + dma_set_pte_pfn(pte, phys_pfn); dma_set_pte_prot(pte, prot); if (prot & DMA_PTE_SNP) dma_set_pte_snp(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - start_pfn++; - index++; + iov_pfn++; + phys_pfn++; } return 0; } +static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + unsigned long first_pfn = hpa >> VTD_PAGE_SHIFT; + unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; + + return domain_pfn_mapping(domain, iova >> VTD_PAGE_SHIFT, first_pfn, + last_pfn - first_pfn + 1, prot); + +} + static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { if (!iommu) @@ -1893,8 +1901,10 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, (base + size - 1) >> VTD_PAGE_SHIFT); - return domain_page_mapping(domain, base, base, size, - DMA_PTE_READ|DMA_PTE_WRITE); + return domain_pfn_mapping(domain, base >> VTD_PAGE_SHIFT, + base >> VTD_PAGE_SHIFT, + size >> VTD_PAGE_SHIFT, + DMA_PTE_READ|DMA_PTE_WRITE); } static int iommu_prepare_identity_map(struct pci_dev *pdev, From 0ab36de274ab094c3992b50c9c48c5c89072ec94 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:01:43 +0100 Subject: [PATCH 151/741] intel-iommu: Use domain_pfn_mapping() in __intel_map_single() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7540ef91d5f7..dccd0a7b7a5f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2477,14 +2477,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, return 0; iommu = domain_get_iommu(domain); - size = aligned_size((u64)paddr, size); + size = aligned_size(paddr, size) >> VTD_PAGE_SHIFT; - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); if (!iova) goto error; - start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; - /* * Check if DMAR supports zero-length reads on write only * mappings.. @@ -2500,20 +2498,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * might have two guest_addr mapping to the same host paddr, but this * is not a big problem */ - ret = domain_page_mapping(domain, start_paddr, - ((u64)paddr) & PHYSICAL_PAGE_MASK, - size, prot); + ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), + paddr >> VTD_PAGE_SHIFT, size, prot); if (ret) goto error; + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_paddr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, start_paddr, size); else iommu_flush_write_buffer(iommu); - return start_paddr + ((u64)paddr & (~PAGE_MASK)); + return start_paddr + (paddr & (~PAGE_MASK)); error: if (iova) From ad05122162b67f64d5a1c6d35e001f7a88619b88 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:22:28 +0100 Subject: [PATCH 152/741] intel-iommu: Use domain_pfn_mapping() in intel_iommu_map_range() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dccd0a7b7a5f..a490b39ca3d5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3516,8 +3516,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain, } dmar_domain->max_addr = max_addr; } - - ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); + /* Round up size to next multiple of PAGE_SIZE, if it and + the low bits of hpa would take us onto the next page */ + size = aligned_size(hpa, size) >> VTD_PAGE_SHIFT; + ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); return ret; } From b536d24d212c994a7d98469ea3a8891573d45fd4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 14:49:31 +0100 Subject: [PATCH 153/741] intel-iommu: Clean up intel_map_sg(), remove domain_page_mapping() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 54 ++++++++++++++------------------------- 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a490b39ca3d5..bc49b121c667 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1674,17 +1674,6 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } -static int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) -{ - unsigned long first_pfn = hpa >> VTD_PAGE_SHIFT; - unsigned long last_pfn = (hpa + size - 1) >> VTD_PAGE_SHIFT; - - return domain_pfn_mapping(domain, iova >> VTD_PAGE_SHIFT, first_pfn, - last_pfn - first_pfn + 1, prot); - -} - static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { if (!iommu) @@ -2745,17 +2734,16 @@ static int intel_nontranslate_map_sg(struct device *hddev, static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t addr; int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; size_t size = 0; int prot = 0; - size_t offset = 0; + size_t offset_pfn = 0; struct iova *iova = NULL; int ret; struct scatterlist *sg; - unsigned long start_addr; + unsigned long start_vpfn; struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); @@ -2768,10 +2756,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne iommu = domain_get_iommu(domain); - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } + for_each_sg(sglist, sg, nelems, i) + size += aligned_size(sg->offset, sg->length); iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { @@ -2789,36 +2775,34 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; - start_addr = iova->pfn_lo << PAGE_SHIFT; - offset = 0; + start_vpfn = mm_to_dma_pfn(iova->pfn_lo); + offset_pfn = 0; for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size = aligned_size((u64)addr, sg->length); - ret = domain_page_mapping(domain, start_addr + offset, - ((u64)addr) & PHYSICAL_PAGE_MASK, - size, prot); + int nr_pages = aligned_size(sg->offset, sg->length) >> VTD_PAGE_SHIFT; + ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, + page_to_dma_pfn(sg_page(sg)), + nr_pages, prot); if (ret) { /* clear the page */ - dma_pte_clear_range(domain, - start_addr >> VTD_PAGE_SHIFT, - (start_addr + offset - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, start_vpfn, + start_vpfn + offset_pfn); /* free page tables */ - dma_pte_free_pagetable(domain, start_addr >> VTD_PAGE_SHIFT, - (start_addr + offset - 1) >> VTD_PAGE_SHIFT); + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + offset_pfn); /* free iova */ __free_iova(&domain->iovad, iova); return 0; } - sg->dma_address = start_addr + offset + - ((u64)addr & (~PAGE_MASK)); + sg->dma_address = ((dma_addr_t)(start_vpfn + offset_pfn) + << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; - offset += size; + offset_pfn += nr_pages; } /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_addr, - offset >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn << VTD_PAGE_SHIFT, + offset_pfn); else iommu_flush_write_buffer(iommu); From 88cb6a7424d9465faf6caaaadff5af0766c93991 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 15:03:06 +0100 Subject: [PATCH 154/741] intel-iommu: Change aligned_size() to aligned_nrpages() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bc49b121c667..22add36fd731 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2320,11 +2320,13 @@ error: return ret; } -static inline u64 aligned_size(u64 host_addr, size_t size) +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) { - u64 addr; - addr = (host_addr & (~PAGE_MASK)) + size; - return PAGE_ALIGN(addr); + host_addr &= ~PAGE_MASK; + host_addr += size + PAGE_SIZE - 1; + + return host_addr >> VTD_PAGE_SHIFT; } struct iova * @@ -2466,7 +2468,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, return 0; iommu = domain_get_iommu(domain); - size = aligned_size(paddr, size) >> VTD_PAGE_SHIFT; + size = aligned_nrpages(paddr, size); iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); if (!iova) @@ -2757,9 +2759,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne iommu = domain_get_iommu(domain); for_each_sg(sglist, sg, nelems, i) - size += aligned_size(sg->offset, sg->length); + size += aligned_nrpages(sg->offset, sg->length); - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, + pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; @@ -2778,7 +2781,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne start_vpfn = mm_to_dma_pfn(iova->pfn_lo); offset_pfn = 0; for_each_sg(sglist, sg, nelems, i) { - int nr_pages = aligned_size(sg->offset, sg->length) >> VTD_PAGE_SHIFT; + int nr_pages = aligned_nrpages(sg->offset, sg->length); ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, page_to_dma_pfn(sg_page(sg)), nr_pages, prot); @@ -3502,7 +3505,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, } /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ - size = aligned_size(hpa, size) >> VTD_PAGE_SHIFT; + size = aligned_nrpages(hpa, size); ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, hpa >> VTD_PAGE_SHIFT, size, prot); return ret; From 03d6a2461ab1704c171ce21081c5022378ef7a91 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 15:33:46 +0100 Subject: [PATCH 155/741] intel-iommu: Make iommu_flush_iotlb_psi() take pfn as argument Most of its callers are having to shift for themselves anyway, so we might as well do it in iommu_flush_iotlb_psi(). Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 22add36fd731..6afe44cb6815 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1058,11 +1058,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages) + unsigned long pfn, unsigned int pages) { unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); /* @@ -2494,15 +2494,15 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (ret) goto error; - start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; - /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_paddr, size); + iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); else iommu_flush_write_buffer(iommu); - return start_paddr + (paddr & (~PAGE_MASK)); + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + start_paddr += paddr & ~PAGE_MASK; + return start_paddr; error: if (iova) @@ -2624,8 +2624,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, dma_pte_free_pagetable(domain, start_pfn, last_pfn); if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, - start_pfn << VTD_PAGE_SHIFT, + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, last_pfn - start_pfn + 1); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2711,8 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, - start_pfn << VTD_PAGE_SHIFT, + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, (last_pfn - start_pfn + 1)); /* free iova */ @@ -2804,8 +2802,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_vpfn << VTD_PAGE_SHIFT, - offset_pfn); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); else iommu_flush_write_buffer(iommu); From 1a4a45516d7a57de0691352d899d7008f2e090d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 16:00:42 +0100 Subject: [PATCH 156/741] intel-iommu: Remove last use of PHYSICAL_PAGE_MASK, for reserving PCI BARs This is fairly broken anyway -- it doesn't take hotplug into account. We should probably be checking page_is_ram() instead. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6afe44cb6815..a55f5fb06b14 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -62,9 +62,6 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) -#ifndef PHYSICAL_PAGE_MASK -#define PHYSICAL_PAGE_MASK PAGE_MASK -#endif /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ @@ -1307,7 +1304,6 @@ static void dmar_init_reserved_ranges(void) struct pci_dev *pdev = NULL; struct iova *iova; int i; - u64 addr, size; init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); @@ -1330,12 +1326,9 @@ static void dmar_init_reserved_ranges(void) r = &pdev->resource[i]; if (!r->flags || !(r->flags & IORESOURCE_MEM)) continue; - addr = r->start; - addr &= PHYSICAL_PAGE_MASK; - size = r->end - addr; - size = PAGE_ALIGN(size); - iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), - IOVA_PFN(size + addr) - 1); + iova = reserve_iova(&reserved_iova_list, + IOVA_PFN(r->start), + IOVA_PFN(r->end)); if (!iova) printk(KERN_ERR "Reserve iova failed\n"); } From c5395d5c4a82159889cb650de93b591ea51d8c56 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 16:35:56 +0100 Subject: [PATCH 157/741] intel-iommu: Clean up iommu_domain_identity_map() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a55f5fb06b14..c5caf7d63a0f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1861,31 +1861,25 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) { - unsigned long size; - unsigned long long base; + unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; + unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - /* The address might not be aligned */ - base = start & PAGE_MASK; - size = end - base; - size = PAGE_ALIGN(size); - if (!reserve_iova(&domain->iovad, IOVA_PFN(base), - IOVA_PFN(base + size) - 1)) { + if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), + dma_to_mm_pfn(last_vpfn))) { printk(KERN_ERR "IOMMU: reserve iova failed\n"); return -ENOMEM; } - pr_debug("Mapping reserved region %lx@%llx for domain %d\n", - size, base, domain->id); + pr_debug("Mapping reserved region %llx-%llx for domain %d\n", + start, end, domain->id); /* * RMRR range might have overlap with physical memory range, * clear it first */ - dma_pte_clear_range(domain, base >> VTD_PAGE_SHIFT, - (base + size - 1) >> VTD_PAGE_SHIFT); + dma_pte_clear_range(domain, first_vpfn, last_vpfn); - return domain_pfn_mapping(domain, base >> VTD_PAGE_SHIFT, - base >> VTD_PAGE_SHIFT, - size >> VTD_PAGE_SHIFT, + return domain_pfn_mapping(domain, first_vpfn, first_vpfn, + last_vpfn - first_vpfn + 1, DMA_PTE_READ|DMA_PTE_WRITE); } From 310a5ab93cb4ce29367238f682affd9ac352f4d0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 18:52:20 +0100 Subject: [PATCH 158/741] intel-iommu: Performance improvement for dma_pte_clear_range() It's a bit silly to repeatedly call domain_flush_cache() for each PTE individually, as we clear it. Instead, batch them up and flush a whole range at a time. We might as well refrain from recalculating the PTE address from scratch each time round the loop too. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c5caf7d63a0f..ba7e37f7111a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -761,34 +761,33 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, return NULL; } -/* clear one page's page table */ -static void dma_pte_clear_one(struct dmar_domain *domain, unsigned long pfn) -{ - struct dma_pte *pte = NULL; - - /* get last level pte */ - pte = dma_pfn_level_pte(domain, pfn, 1); - - if (pte) { - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); - } -} - /* clear last level pte, a tlb flush should be followed */ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); /* we don't need lock here; nobody else touches the iova range */ while (start_pfn <= last_pfn) { - dma_pte_clear_one(domain, start_pfn); - start_pfn++; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + if (!pte) { + start_pfn = align_to_level(start_pfn + 1, 2); + continue; + } + while (start_pfn <= last_pfn && + (unsigned long)pte >> VTD_PAGE_SHIFT == + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + dma_clear_pte(pte); + start_pfn++; + pte++; + } + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); } } From 6f6a00e40aa3fdd3b29c30e3ef1fc9690506bc03 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 20:38:49 +0100 Subject: [PATCH 159/741] intel-iommu: Performance improvement for domain_pfn_mapping() As with dma_pte_clear_range(), don't keep flushing a single PTE at a time. And also micro-optimise the setting of PTE values rather than using the helper functions to do all the masking. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ba7e37f7111a..f8074236bcce 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1639,7 +1639,7 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *pte; + struct dma_pte *first_pte = NULL, *pte = NULL; int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1647,19 +1647,27 @@ static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + while (nr_pages--) { - pte = pfn_to_dma_pte(domain, iov_pfn); - if (!pte) - return -ENOMEM; + if (!pte) { + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + if (!pte) + return -ENOMEM; + } /* We don't need lock here, nobody else * touches the iova range */ BUG_ON(dma_pte_addr(pte)); - dma_set_pte_pfn(pte, phys_pfn); - dma_set_pte_prot(pte, prot); - if (prot & DMA_PTE_SNP) - dma_set_pte_snp(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + pte->val = (phys_pfn << VTD_PAGE_SHIFT) | prot; + pte++; + if (!nr_pages || + (unsigned long)pte >> VTD_PAGE_SHIFT != + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } iov_pfn++; phys_pfn++; } From 875764de6f0ddb23d270c29357d5a339232a0488 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 28 Jun 2009 21:20:51 +0100 Subject: [PATCH 160/741] intel-iommu: Simplify __intel_alloc_iova() There's no need for the separate iommu_alloc_iova() function, and certainly not for it to be global. Remove the underscores while we're at it. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 47 ++++++++++++++------------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f8074236bcce..11a23201445a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2323,43 +2323,31 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return host_addr >> VTD_PAGE_SHIFT; } -struct iova * -iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) -{ - struct iova *piova; - - /* Make sure it's in range */ - end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); - if (!size || (IOVA_START_ADDR + size > end)) - return NULL; - - piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT, IOVA_PFN(end), 1); - return piova; -} - -static struct iova * -__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, - size_t size, u64 dma_mask) +static struct iova *intel_alloc_iova(struct device *dev, + struct dmar_domain *domain, + unsigned long nrpages, uint64_t dma_mask) { struct pci_dev *pdev = to_pci_dev(dev); struct iova *iova = NULL; - if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) - iova = iommu_alloc_iova(domain, size, dma_mask); - else { + /* Restrict dma_mask to the width that the iommu can handle */ + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + + if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { /* * First try to allocate an io virtual address in * DMA_BIT_MASK(32) and if that fails then try allocating * from higher range */ - iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); - if (!iova) - iova = iommu_alloc_iova(domain, size, dma_mask); + iova = alloc_iova(&domain->iovad, nrpages, + IOVA_PFN(DMA_BIT_MASK(32)), 1); + if (iova) + return iova; } - - if (!iova) { - printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); + iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); + if (unlikely(!iova)) { + printk(KERN_ERR "Allocating %ld-page iova for %s failed", + nrpages, pci_name(pdev)); return NULL; } @@ -2464,7 +2452,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) goto error; @@ -2753,8 +2741,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne for_each_sg(sglist, sg, nelems, i) size += aligned_nrpages(sg->offset, sg->length); - iova = __intel_alloc_iova(hwdev, domain, size << VTD_PAGE_SHIFT, - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; From e6ce3066010a21bde961d8f8cefe0b69cae78a0f Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 29 Jun 2009 14:31:58 +0800 Subject: [PATCH 161/741] fs: allow d_instantiate to be called with negative parent dentry The new fsnotify infrastructure (starting at 90586523) causes an oops in spufs, where we populate a directory with files before instantiating the directory itself. The new changes seem to have introduced an assumption that a dentry's parent will be positive when instantiating. This change makes it once again possible to d_instantiate a dentry with a negative parent, and brings __fsnotify_d_instantiate() into line with inotify_d_instantiate(), which already has this NULL check. Signed-off-by: Jeremy Kerr Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 44848aa830dc..6c3de999fb34 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -280,7 +280,7 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) assert_spin_locked(&dentry->d_lock); parent = dentry->d_parent; - if (fsnotify_inode_watches_children(parent->d_inode)) + if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; From e0af6062aa4f89081afb8a1a4269605775d354de Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 18 Jun 2009 13:05:49 +0400 Subject: [PATCH 162/741] MAINTAINERS: ieee802154 lists are moderated for non-subscribers. Note that our mailing list is moderated for non-subscribers. Signed-off-by: Dmitry Eremin-Solenikov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fa2a16def17a..28c150e916a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2886,7 +2886,7 @@ P: Dmitry Eremin-Solenikov M: dbaryshkov@gmail.com P: Sergey Lapin M: slapin@ossfans.org -L: linux-zigbee-devel@lists.sourceforge.net +L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://apps.sourceforge.net/trac/linux-zigbee T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git S: Maintained From 932c1329acebc03ef5efa3647c9c3a967b59d0c4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:00:08 +0400 Subject: [PATCH 163/741] nl802154: fix Oops in ieee802154_nl_get_dev ieee802154_nl_get_dev() lacks check for the existance of the device that was returned by dev_get_XXX, thus resulting in Oops for non-existing devices. Fix it. Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 105ad10876af..332b947ae812 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -276,6 +276,9 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) else return NULL; + if (!dev) + return NULL; + if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; From dfd06fe8246c0425f8d6850b8e2c872b0d691ec3 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:02:09 +0400 Subject: [PATCH 164/741] nl802154: add module license and description Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 332b947ae812..27eda9fdf3c2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -524,3 +524,6 @@ static void __exit ieee802154_nl_exit(void) } module_exit(ieee802154_nl_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); + From 17bb9e0d906b625f86e9d31740bb1c35bc0f63d7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 29 Jun 2009 17:13:56 +0100 Subject: [PATCH 165/741] kmemleak: Do not report new leaked objects if the scanning was stopped If the scanning was stopped with a signal, it is possible that some objects are left with a white colour (potential leaks) and reported. Add a check to avoid reporting such objects. Signed-off-by: Catalin Marinas --- mm/kmemleak.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c37e8e50e4de..e094c4dbdf55 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1029,6 +1029,12 @@ static void kmemleak_scan(void) } WARN_ON(!list_empty(&gray_list)); + /* + * If scanning was stopped do not report any new unreferenced objects. + */ + if (scan_should_stop()) + return; + /* * Scanning result reporting. */ @@ -1184,11 +1190,10 @@ static int kmemleak_seq_show(struct seq_file *seq, void *v) unsigned long flags; spin_lock_irqsave(&object->lock, flags); - if (!unreferenced_object(object)) - goto out; - print_unreferenced(seq, object); - reported_leaks++; -out: + if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) { + print_unreferenced(seq, object); + reported_leaks++; + } spin_unlock_irqrestore(&object->lock, flags); return 0; } From b6e687221eb840bacd4d4a991e5f8e7ed3ae910a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 29 Jun 2009 17:13:57 +0100 Subject: [PATCH 166/741] kmemleak: Do not warn if an unknown object is freed vmap'ed memory blocks are not tracked by kmemleak (yet) but they may be released with vfree() which is tracked. The corresponding kmemleak warning is only enabled in debug mode. Future patch will add support for ioremap and vmap. Signed-off-by: Catalin Marinas --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e094c4dbdf55..eeece2deace2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -531,8 +531,10 @@ static void delete_object(unsigned long ptr) write_lock_irqsave(&kmemleak_lock, flags); object = lookup_object(ptr, 0); if (!object) { +#ifdef DEBUG kmemleak_warn("Freeing unknown object at 0x%08lx\n", ptr); +#endif write_unlock_irqrestore(&kmemleak_lock, flags); return; } From 12de38b186c2af97bf0b4a1f907f766df46b1def Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 29 Jun 2009 17:13:55 +0100 Subject: [PATCH 167/741] kmemleak: Inform kmemleak about pid_hash Kmemleak does not track alloc_bootmem calls but the pid_hash allocated in pidhash_init() would need to be scanned as it contains pointers to struct pid objects. Signed-off-by: Catalin Marinas --- kernel/pid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/pid.c b/kernel/pid.c index 31310b5d3f50..5fa1db48d8b7 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -36,6 +36,7 @@ #include #include #include +#include #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) @@ -512,6 +513,12 @@ void __init pidhash_init(void) pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); if (!pid_hash) panic("Could not alloc pidhash!\n"); + /* + * pid_hash contains references to allocated struct pid objects and it + * must be scanned by kmemleak to avoid false positives. + */ + kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0, + GFP_KERNEL); for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]); } From 0d07348931daef854aca8c834a89f1a99ba4ff2b Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 24 Jun 2009 12:08:27 +0900 Subject: [PATCH 168/741] PCI MSI: Return if alloc_msi_entry for MSI-X failed In current code it continues setup even if alloc_msi_entry() for MSI-X is failed due to lack of memory. It means arch_setup_msi_irqs() might be called with msi_desc entries less than its argument nvec. At least x86's arch_setup_msi_irqs() uses list_for_each_entry() for dev->msi_list that suspected to have entries same numbers as nvec, and it doesn't check the number of allocated vectors and passed arg nvec. Therefore it will result in success of pci_enable_msix(), with less vectors allocated than requested. This patch fixes the error route to return -ENOMEM, instead of continuing the setup (proposed by Matthew Wilcox). Note that there is no iounmap in msi_free_irqs() if no msi_disc is allocated. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index d9f06fbfa0bf..628c14150d49 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -439,8 +439,14 @@ static int msix_capability_init(struct pci_dev *dev, for (i = 0; i < nvec; i++) { entry = alloc_msi_entry(dev); - if (!entry) - break; + if (!entry) { + if (!i) + iounmap(base); + else + msi_free_irqs(dev); + /* No enough memory. Don't try again */ + return -ENOMEM; + } j = entries[i].entry; entry->msi_attrib.is_msix = 1; From 50e5628a4ac465a52f0d4ca6567343be029731a0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:40 -0700 Subject: [PATCH 169/741] PCI ECRC: Remove unnecessary semicolons Acked-by: Andrew Patterson Signed-off-by: Joe Perches Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/ecrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c index ece97df4df6d..a928d8ab6bda 100644 --- a/drivers/pci/pcie/aer/ecrc.c +++ b/drivers/pci/pcie/aer/ecrc.c @@ -106,7 +106,7 @@ void pcie_set_ecrc_checking(struct pci_dev *dev) disable_ecrc_checking(dev); break; case ECRC_POLICY_ON: - enable_ecrc_checking(dev);; + enable_ecrc_checking(dev); break; default: return; From 977badb2f385db14d8ba3fcf02fbd7ead8e63d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Fri, 26 Jun 2009 11:27:41 +0900 Subject: [PATCH 170/741] PCI: remove pci_dac_dma_... APIs on mn10300 It seems that mn10300 made it upstream after Jan Beulich's pci_dac_dma_* cleanup work and still defines pci_dac_dma_supported(). This API is not required by the PCI subsystem anymore, so remove it. Acked-by: David Howells Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Jesse Barnes --- arch/mn10300/include/asm/pci.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h index e58b9a46e1b1..35d2ed6396f6 100644 --- a/arch/mn10300/include/asm/pci.h +++ b/arch/mn10300/include/asm/pci.h @@ -70,10 +70,6 @@ struct pci_dev; */ #define PCI_DMA_BUS_IS_PHYS (1) - -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (0) - /* Return the index of the PCI controller for device. */ static inline int pci_controller_num(struct pci_dev *dev) { From 654b75e044119bf8e7d773bce41ea039281bbfbe Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 26 Jun 2009 14:04:46 +0800 Subject: [PATCH 171/741] PCI: check if bus has a proper bridge device before triggering SBR For devices attached to the root bus, we can't trigger Secondary Bus Reset because there is no bridge device associated with the bus. So need to check bus->self again NULL first before using it. Reviewed-by: Kenji Kaneshige Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6c93af5ced18..d5d6f5667d83 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2171,7 +2171,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) u16 ctrl; struct pci_dev *pdev; - if (dev->subordinate) + if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) From 503998ca4a295f7da233689850ba4b9d13cf41e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 24 Jun 2009 09:18:14 -0700 Subject: [PATCH 172/741] PCI: fix kernel-doc warnings Add documentation for missing parameters in PCI hotplug code. Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pci_hotplug_core.c | 2 ++ drivers/pci/slot.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 844580489d4d..5c5043f239cf 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -555,6 +555,8 @@ static struct hotplug_slot *get_slot_from_name (const char *name) * @slot: pointer to the &struct hotplug_slot to register * @devnr: device number * @name: name registered with kobject core + * @owner: caller module owner + * @mod_name: caller module name * * Registers a hotplug slot with the pci hotplug subsystem, which will allow * userspace interaction to the slot. diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index eddb0748b0ea..8c02b6c53bdb 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -311,7 +311,7 @@ EXPORT_SYMBOL_GPL(pci_destroy_slot); #include /** * pci_hp_create_link - create symbolic link to the hotplug driver module. - * @slot: struct pci_slot + * @pci_slot: struct pci_slot * * Helper function for pci_hotplug_core.c to create symbolic link to * the hotplug driver module. @@ -334,7 +334,7 @@ EXPORT_SYMBOL_GPL(pci_hp_create_module_link); /** * pci_hp_remove_link - remove symbolic link to the hotplug driver module. - * @slot: struct pci_slot + * @pci_slot: struct pci_slot * * Helper function for pci_hotplug_core.c to remove symbolic link to * the hotplug driver module. From 7a661c6f1082693a7e9627e9ad2d1546a9337fdc Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 24 Jun 2009 16:02:27 +0100 Subject: [PATCH 173/741] PCI: More PATA quirks for not entering D3 The ALi loses some state if it goes into D3. Unfortunately even with the chipset documents I can't figure out how to restore some bits of it. The VIA one saves/restores apparently fine but the ACPI _GTM methods break on some platforms if we do this and this causes cable misdetections. These are both effectively regressions as historically nothing matched the devices and then decided not to bind to them. Nowdays something is binding to all sorts of devices and a result they get dumped into D3. Signed-off-by: Alan Cox Acked-by: Jeff Garzik Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 56552d74abea..06b965623962 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1058,6 +1058,11 @@ static void __devinit quirk_no_ata_d3(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3); +/* ALi loses some register settings that we cannot then restore */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, quirk_no_ata_d3); +/* VIA comes back fine but we need to keep it alive or ACPI GTM failures + occur when mode detecting */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_no_ata_d3); /* This was originally an Alpha specific thing, but it really fits here. * The i82375 PCI/EISA bridge appears as non-classified. Fix that. From 2fc90f6133a87da8177636866557d4cc5f56e661 Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Wed, 24 Jun 2009 16:22:30 +0400 Subject: [PATCH 174/741] PCI: make pci_name() take const argument Since this function should never modify it (saves warnings when called with const args too). Signed-off-by: Alexey Zaytsev Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci.h b/include/linux/pci.h index d304ddf412d0..115fb7ba5089 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1145,7 +1145,7 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) /* If you want to know what to call your pci_dev, ask this function. * Again, it's a wrapper around the generic device. */ -static inline const char *pci_name(struct pci_dev *pdev) +static inline const char *pci_name(const struct pci_dev *pdev) { return dev_name(&pdev->dev); } From 2c21fd4b333e4c780a46edcd6d1e85bfc6cdf371 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 23 Jun 2009 17:40:04 +0900 Subject: [PATCH 175/741] PCI MSI: shorten PCI_MSIX_ENTRY_* symbol names These names are too long! Drop _OFFSET to save some bytes/lines. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 18 ++++++++---------- drivers/pci/msi.h | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 628c14150d49..a088fc6f5838 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -151,7 +151,7 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + PCI_MSIX_ENTRY_VECTOR_CTRL; mask_bits &= ~1; mask_bits |= flag; writel(mask_bits, desc->mask_base + offset); @@ -188,9 +188,9 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) void __iomem *base = entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA); } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -225,11 +225,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) base = entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); + writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA); } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; @@ -493,7 +491,7 @@ static int msix_capability_init(struct pci_dev *dev, set_irq_msi(entry->irq, entry); j = entries[i].entry; entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + PCI_MSIX_ENTRY_VECTOR_CTRL); msix_mask_irq(entry, 1); i++; } diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index a0662842550b..de27c1cb5a2b 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -6,11 +6,11 @@ #ifndef MSI_H #define MSI_H -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4 -#define PCI_MSIX_ENTRY_DATA_OFFSET 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12 +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 #define msi_control_reg(base) (base + PCI_MSI_FLAGS) #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO) From 7ba1930db02fc3118165338ef4e562869f575583 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 23 Jun 2009 17:39:27 +0900 Subject: [PATCH 176/741] PCI MSI: Unmask MSI if setup failed The initial state of mask register of MSI is unmasked. We set it masked before calling arch_setup_msi_irqs(). If arch_setup_msi_irq() fails, it is better to restore the state of the mask register. Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a088fc6f5838..9ab4fe8f20af 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -383,6 +383,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) /* Configure MSI capability structure */ ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { + msi_mask_irq(entry, mask, ~mask); msi_free_irqs(dev); return ret; } From 12abb8ba8444f7c9b355bbdd44a6d0839f7a41b6 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 24 Jun 2009 12:08:09 +0900 Subject: [PATCH 177/741] PCI MSI: Fix restoration of MSI/MSI-X mask states in suspend/resume There are 2 problems on mask states in suspend/resume. [1]: It is better to restore the mask states of MSI/MSI-X to initial states (MSI is unmasked, MSI-X is masked) when we release the device. The pci_msi_shutdown() does the restoration of mask states for MSI, while the msi_free_irqs() does it for MSI-X. In other words, in the "disable" path both of MSI and MSI-X are handled, but in the "shutdown" path only MSI is handled. MSI: pci_disable_msi() => pci_msi_shutdown() [ mask states for MSI restored ] => msi_set_enable(dev, pos, 0); => msi_free_irqs() MSI-X: pci_disable_msix() => pci_msix_shutdown() => msix_set_enable(dev, 0); => msix_free_all_irqs => msi_free_irqs() [ mask states for MSI-X restored ] This patch moves the masking for MSI-X from msi_free_irqs() to pci_msix_shutdown(). This change has some positive side effects: - It prevents OS from touching mask states before reading preserved bits in the register, which can be happen if msi_free_irqs() is called from error path in msix_capability_init(). - It also prevents touching the register after turning off MSI-X in "disable" path, which can be a problem on some devices. [2]: We have cache of the mask state in msi_desc, which is automatically updated when msi/msix_mask_irq() is called. This cached states are used for the resume. But since what need to be restored in the resume is the states before the shutdown on the suspend, calling msi/msix_mask_irq() from pci_msi/msix_shutdown() is not appropriate. This patch introduces __msi/msix_mask_irq() that do mask as same as msi/msix_mask_irq() but does not update cached state, for use in pci_msi/msix_shutdown(). [updated: get rid of msi/msix_mask_irq_nocache() (proposed by Matthew Wilcox)] Reviewed-by: Matthew Wilcox Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9ab4fe8f20af..d986afb7032b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -127,17 +127,23 @@ static inline __attribute_const__ u32 msi_enabled_mask(u16 control) * reliably as devices without an INTx disable bit will then generate a * level IRQ which will never be cleared. */ -static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { u32 mask_bits = desc->masked; if (!desc->msi_attrib.maskbit) - return; + return 0; mask_bits &= ~mask; mask_bits |= flag; pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); - desc->masked = mask_bits; + + return mask_bits; +} + +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +{ + desc->masked = __msi_mask_irq(desc, mask, flag); } /* @@ -147,7 +153,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) * file. This saves a few milliseconds when initialising devices with lots * of MSI-X interrupts. */ -static void msix_mask_irq(struct msi_desc *desc, u32 flag) +static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + @@ -155,7 +161,13 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) mask_bits &= ~1; mask_bits |= flag; writel(mask_bits, desc->mask_base + offset); - desc->masked = mask_bits; + + return mask_bits; +} + +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + desc->masked = __msix_mask_irq(desc, flag); } static void msi_set_mask_bit(unsigned irq, u32 flag) @@ -616,9 +628,11 @@ void pci_msi_shutdown(struct pci_dev *dev) pci_intx_for_msi(dev, 1); dev->msi_enabled = 0; + /* Return the device with MSI unmasked as initial states */ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &ctrl); mask = msi_capable_mask(ctrl); - msi_mask_irq(desc, mask, ~mask); + /* Keep cached state to be restored */ + __msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ dev->irq = desc->msi_attrib.default_irq; @@ -658,7 +672,6 @@ static int msi_free_irqs(struct pci_dev* dev) list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { if (entry->msi_attrib.is_msix) { - msix_mask_irq(entry, 1); if (list_is_last(&entry->list, &dev->msi_list)) iounmap(entry->mask_base); } @@ -746,9 +759,17 @@ static void msix_free_all_irqs(struct pci_dev *dev) void pci_msix_shutdown(struct pci_dev* dev) { + struct msi_desc *entry; + if (!pci_msi_enable || !dev || !dev->msix_enabled) return; + /* Return the device with MSI-X masked as initial states */ + list_for_each_entry(entry, &dev->msi_list, list) { + /* Keep cached states to be restored */ + __msix_mask_irq(entry, 1); + } + msix_set_enable(dev, 0); pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; From 210ad39fb7ef0bc0494483f517f42524f16bb2a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Jun 2009 21:50:54 +0200 Subject: [PATCH 178/741] perf stat: Use percentages for scaling output Peter expressed a strong preference for percentage based display of scaled values - so revert to that from the recently introduced multiplication-factor unit. Reported-by: Peter Zijlstra Cc: Jaswinder Singh Rajput Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e5ea4e2e5fd..c5a290727a92 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -307,7 +307,8 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); fprintf(stderr, "\n"); } From 051ae7f7344f453616b6b10332d4d8e1d40ed823 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 29 Jun 2009 21:13:21 +1000 Subject: [PATCH 179/741] perf_counter tools: Reduce perf stat measurement overhead/skew Vince Weaver reported a 'perf stat' measurement overhead in the count of retired instructions, which can amount to a +6000 instructions inflated count in the reported count. At present, perf stat creates its counters on the perf process. Thus the counters count the fork and various other activity in both the parent and child, such as the resolver overhead for resolving PLT entries for any libc functions that haven't been called before, such as execvp. This reduces the overhead by creating the counters on the child process after the fork, using a couple of pipes to synchronize so that the child process waits until the parent has created the counters before doing the exec. To eliminate the PLT resolution overhead on calling execvp, this does a dummy execvp first which will always fail. With this, the overhead of executing a program goes down from over 4800 instructions to about 90 instructions on powerpc (32-bit). This was measured with a statically-linked program written in assembler which only does the 3 instructions needed to call _exit(0). Before: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 4858 instructions 0.001274523 seconds time elapsed After: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 92 instructions 0.000468153 seconds time elapsed Reported-by: Vince Weaver Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19016.41425.814043.870352@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 64 ++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c5a290727a92..201ef2367dcb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -99,7 +99,7 @@ static u64 runtime_cycles_noise; #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter) +static void create_perf_stat_counter(int counter, int pid) { struct perf_counter_attr *attr = attrs + counter; @@ -119,7 +119,7 @@ static void create_perf_stat_counter(int counter) attr->inherit = inherit; attr->disabled = 1; - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); @@ -205,12 +205,58 @@ static int run_perf_stat(int argc, const char **argv) int status = 0; int counter; int pid; + int child_ready_pipe[2], go_pipe[2]; + char buf; if (!system_wide) nr_cpus = 1; + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + perror("failed to create pipes"); + exit(1); + } + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + read(go_pipe[0], &buf, 1); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); + } + + /* + * Wait for the child to be ready to exec. + */ + close(child_ready_pipe[1]); + close(go_pipe[0]); + read(child_ready_pipe[0], &buf, 1); + close(child_ready_pipe[0]); + for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + create_perf_stat_counter(counter, pid); /* * Enable counters and exec the command: @@ -218,19 +264,9 @@ static int run_perf_stat(int argc, const char **argv) t0 = rdclock(); prctl(PR_TASK_PERF_COUNTERS_ENABLE); - if ((pid = fork()) < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } - } - + close(go_pipe[1]); wait(&status); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); walltime_nsecs[run_idx] = t1 - t0; From 2bf427b25b79eb7cea27963a66c3d4684cae0e0c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 29 Jun 2009 19:20:42 -0700 Subject: [PATCH 180/741] ide: fix resume for CONFIG_BLK_DEV_IDEACPI=y commit 2f0d0fd2a605666d38e290c5c0d2907484352dc4 ("ide-acpi: cleanup do_drive_get_GTF()") didn't account for the lack of hwif->acpidata check in generic_ide_suspend() [ indirect user of do_drive_get_GTF() through ide_acpi_exec_tfs() ] resulting in broken resume when ACPI support is enabled but ACPI data is unavailable. Fix it by adding ide_port_acpi() helper for checking if port needs ACPI handling and cleaning generic_ide_{suspend,resume}() to use it instead of hiding hwif->acpidata and ide_noacpi checks in IDE ACPI helpers (this should help in preventing similar bugs in the future). While at it: - kill superfluous debugging printks in ide_acpi_{get,push}_timing() Reported-and-tested-by: Etienne Basset Also-reported-and-tested-by: Jeff Chua Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-acpi.c | 37 +++++++------------------------------ drivers/ide/ide-pm.c | 30 ++++++++++++++++++------------ include/linux/ide.h | 2 ++ 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index 77f79d26b264..c509c9916464 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -92,6 +92,11 @@ int ide_acpi_init(void) return 0; } +bool ide_port_acpi(ide_hwif_t *hwif) +{ + return ide_noacpi == 0 && hwif->acpidata; +} + /** * ide_get_dev_handle - finds acpi_handle and PCI device.function * @dev: device to locate @@ -352,9 +357,6 @@ int ide_acpi_exec_tfs(ide_drive_t *drive) unsigned long gtf_address; unsigned long obj_loc; - if (ide_noacpi) - return 0; - DEBPRINT("call get_GTF, drive=%s port=%d\n", drive->name, drive->dn); ret = do_drive_get_GTF(drive, >f_length, >f_address, &obj_loc); @@ -389,16 +391,6 @@ void ide_acpi_get_timing(ide_hwif_t *hwif) struct acpi_buffer output; union acpi_object *out_obj; - if (ide_noacpi) - return; - - DEBPRINT("ENTER:\n"); - - if (!hwif->acpidata) { - DEBPRINT("no ACPI data for %s\n", hwif->name); - return; - } - /* Setting up output buffer for _GTM */ output.length = ACPI_ALLOCATE_BUFFER; output.pointer = NULL; /* ACPI-CA sets this; save/free it later */ @@ -479,16 +471,6 @@ void ide_acpi_push_timing(ide_hwif_t *hwif) struct ide_acpi_drive_link *master = &hwif->acpidata->master; struct ide_acpi_drive_link *slave = &hwif->acpidata->slave; - if (ide_noacpi) - return; - - DEBPRINT("ENTER:\n"); - - if (!hwif->acpidata) { - DEBPRINT("no ACPI data for %s\n", hwif->name); - return; - } - /* Give the GTM buffer + drive Identify data to the channel via the * _STM method: */ /* setup input parameters buffer for _STM */ @@ -527,16 +509,11 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on) ide_drive_t *drive; int i; - if (ide_noacpi || ide_noacpi_psx) + if (ide_noacpi_psx) return; DEBPRINT("ENTER:\n"); - if (!hwif->acpidata) { - DEBPRINT("no ACPI data for %s\n", hwif->name); - return; - } - /* channel first and then drives for power on and verse versa for power off */ if (on) acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0); @@ -616,7 +593,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif) drive->name, err); } - if (!ide_acpionboot) { + if (ide_noacpi || ide_acpionboot == 0) { DEBPRINT("ACPI methods disabled on boot\n"); return; } diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index c14ca144cffe..ad7be2669dcb 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -10,9 +10,11 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) struct request_pm_state rqpm; int ret; - /* call ACPI _GTM only once */ - if ((drive->dn & 1) == 0 || pair == NULL) - ide_acpi_get_timing(hwif); + if (ide_port_acpi(hwif)) { + /* call ACPI _GTM only once */ + if ((drive->dn & 1) == 0 || pair == NULL) + ide_acpi_get_timing(hwif); + } memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); @@ -26,9 +28,11 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ret = blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); - /* call ACPI _PS3 only after both devices are suspended */ - if (ret == 0 && ((drive->dn & 1) || pair == NULL)) - ide_acpi_set_state(hwif, 0); + if (ret == 0 && ide_port_acpi(hwif)) { + /* call ACPI _PS3 only after both devices are suspended */ + if ((drive->dn & 1) || pair == NULL) + ide_acpi_set_state(hwif, 0); + } return ret; } @@ -42,13 +46,15 @@ int generic_ide_resume(struct device *dev) struct request_pm_state rqpm; int err; - /* call ACPI _PS0 / _STM only once */ - if ((drive->dn & 1) == 0 || pair == NULL) { - ide_acpi_set_state(hwif, 1); - ide_acpi_push_timing(hwif); - } + if (ide_port_acpi(hwif)) { + /* call ACPI _PS0 / _STM only once */ + if ((drive->dn & 1) == 0 || pair == NULL) { + ide_acpi_set_state(hwif, 1); + ide_acpi_push_timing(hwif); + } - ide_acpi_exec_tfs(drive); + ide_acpi_exec_tfs(drive); + } memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); diff --git a/include/linux/ide.h b/include/linux/ide.h index c6af7c44d46c..edc93a6d931d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1419,6 +1419,7 @@ static inline void ide_dma_unmap_sg(ide_drive_t *drive, #ifdef CONFIG_BLK_DEV_IDEACPI int ide_acpi_init(void); +bool ide_port_acpi(ide_hwif_t *hwif); extern int ide_acpi_exec_tfs(ide_drive_t *drive); extern void ide_acpi_get_timing(ide_hwif_t *hwif); extern void ide_acpi_push_timing(ide_hwif_t *hwif); @@ -1427,6 +1428,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *); extern void ide_acpi_set_state(ide_hwif_t *hwif, int on); #else static inline int ide_acpi_init(void) { return 0; } +static inline bool ide_port_acpi(ide_hwif_t *hwif) { return 0; } static inline int ide_acpi_exec_tfs(ide_drive_t *drive) { return 0; } static inline void ide_acpi_get_timing(ide_hwif_t *hwif) { ; } static inline void ide_acpi_push_timing(ide_hwif_t *hwif) { ; } From e18ed145c7f556f1de8350c32739bf35b26df705 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Mon, 29 Jun 2009 19:31:41 -0700 Subject: [PATCH 181/741] ide: memory overrun in ide_get_identity_ioctl() on big endian machines using ioctl HDIO_OBSOLETE_IDENTITY This patch fixes a memory overrun in function ide_get_identity_ioctl() which chooses the size of a memory buffer depending on the ioctl command that led to the function call, however, passes that buffer to a function which needs the buffer size to be always chosen unconditionally. Due to conditional compilation the memory overrun can only happen on big endian machines. The error can be triggered using ioctl HDIO_OBSOLETE_IDENTITY. Usage of ioctl HDIO_GET_IDENTITY is safe. Signed-off-by: Christian Engelmayer Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-ioctls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 82f252c3ee6e..e246d3d3fbcc 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -64,7 +64,8 @@ static int ide_get_identity_ioctl(ide_drive_t *drive, unsigned int cmd, goto out; } - id = kmalloc(size, GFP_KERNEL); + /* ata_id_to_hd_driveid() relies on 'id' to be fully allocated. */ + id = kmalloc(ATA_ID_WORDS * 2, GFP_KERNEL); if (id == NULL) { rc = -ENOMEM; goto out; From 8e5b9dda99cc86bdbd822935fcc37c5808e271b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:03:30 +0000 Subject: [PATCH 182/741] tcp: Stop non-TSO packets morphing into TSO If a socket starts out on a non-TSO route, and then switches to a TSO route, then the tail on the tx queue can morph into a TSO packet, causing mischief because the rest of the stack does not expect a partially linear TSO packet. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before declaring a packet as TSO. Reported-by: Johannes Berg Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 416fc4c2e7eb..5bdf08d312d9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -725,7 +725,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ From 6828b92bd21acd65113dfe0541f19f5df0d9668f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:06:41 +0000 Subject: [PATCH 183/741] tcp: Do not tack on TSO data to non-TSO packet If a socket starts out on a non-TSO route, and then switches to a TSO route, then we will tack on data to the tail of the tx queue even if it started out life as non-TSO. This is suboptimal because all of it will then be copied and checksummed unnecessarily. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before appending extra data beyond the MSS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17b89c523f9d..7870a535dac6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -903,13 +903,17 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, iov++; while (seglen > 0) { - int copy; + int copy = 0; + int max = size_goal; skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; + } - if (!tcp_send_head(sk) || - (copy = size_goal - skb->len) <= 0) { - + if (copy <= 0) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -930,6 +934,7 @@ new_segment: skb_entail(sk, skb); copy = size_goal; + max = size_goal; } /* Try to append data to the end of skb. */ @@ -1028,7 +1033,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < size_goal || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB)) continue; if (forced_push(tp)) { From 1802571b9865c0fc1d8d0fa39cf73275f3a75af3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 18:42:53 +0000 Subject: [PATCH 184/741] xfrm: use xfrm_addr_cmp() instead of compare addresses directly Clean up to use xfrm_addr_cmp() instead of compare addresses directly. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 57 ++++++------------------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f1f86565f16..f2f7c638083e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -668,22 +668,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -699,26 +687,11 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1001,25 +974,11 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } From d51e9b0d94336db56a13fdc65bb30751e3ea33b7 Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Mon, 29 Jun 2009 09:34:20 +0000 Subject: [PATCH 185/741] net/irda: convert bfin_sir to net_device_ops Signed-off-by: Graf Yang Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- drivers/net/irda/bfin_sir.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/irda/bfin_sir.c b/drivers/net/irda/bfin_sir.c index f3eed6a8fba5..911c082cee5a 100644 --- a/drivers/net/irda/bfin_sir.c +++ b/drivers/net/irda/bfin_sir.c @@ -677,6 +677,14 @@ static int bfin_sir_init_iobuf(iobuff_t *io, int size) return 0; } +static const struct net_device_ops bfin_sir_ndo = { + .ndo_open = bfin_sir_open, + .ndo_stop = bfin_sir_stop, + .ndo_start_xmit = bfin_sir_hard_xmit, + .ndo_do_ioctl = bfin_sir_ioctl, + .ndo_get_stats = bfin_sir_stats, +}; + static int __devinit bfin_sir_probe(struct platform_device *pdev) { struct net_device *dev; @@ -718,12 +726,8 @@ static int __devinit bfin_sir_probe(struct platform_device *pdev) if (err) goto err_mem_3; - dev->hard_start_xmit = bfin_sir_hard_xmit; - dev->open = bfin_sir_open; - dev->stop = bfin_sir_stop; - dev->do_ioctl = bfin_sir_ioctl; - dev->get_stats = bfin_sir_stats; - dev->irq = sir_port->irq; + dev->netdev_ops = &bfin_sir_ndo; + dev->irq = sir_port->irq; irda_init_max_qos_capabilies(&self->qos); From ff0ac74afb5b9916641723a78796d4ee7937c2ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 22:49:37 +0000 Subject: [PATCH 186/741] sctp: xmit sctp packet always return no route error Commit 'net: skb->dst accessors'(adf30907d63893e4208dfe3f5c88ae12bc2f25d5) broken the sctp protocol stack, the sctp packet can never be sent out after Eric Dumazet's patch, which have typo in the sctp code. Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index b76411444515..b94c21190566 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -407,7 +407,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } dst = dst_clone(tp->dst); skb_dst_set(nskb, dst); - if (dst) + if (!dst) goto no_route; /* Build the SCTP header. */ From e1605495c716ef4eebdb7606bcd1b593f28e2837 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 11:17:38 +0100 Subject: [PATCH 187/741] intel-iommu: Introduce domain_sg_mapping() to speed up intel_map_sg() Instead of calling domain_pfn_mapping() repeatedly with single or small numbers of pages, just pass the sglist in. It can optimise the number of cache flushes like domain_pfn_mapping() does, and gives a huge speedup for large scatterlists. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 83 +++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 11a23201445a..28bd5f2d78fc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1635,6 +1635,56 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } +static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + uint64_t pteval; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned long sg_res = 0; + + BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); + + if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) + return -EINVAL; + + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + + while (nr_pages--) { + if (!sg_res) { + sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_length = sg->length; + pteval = page_to_phys(sg_page(sg)) | prot; + } + if (!pte) { + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + if (!pte) + return -ENOMEM; + } + /* We don't need lock here, nobody else + * touches the iova range + */ + BUG_ON(dma_pte_addr(pte)); + pte->val = pteval; + pte++; + if (!nr_pages || + (unsigned long)pte >> VTD_PAGE_SHIFT != + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + iov_pfn++; + pteval += VTD_PAGE_SIZE; + sg_res--; + if (!sg_res) + sg = sg_next(sg); + } + return 0; +} + static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -2758,27 +2808,18 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne prot |= DMA_PTE_WRITE; start_vpfn = mm_to_dma_pfn(iova->pfn_lo); - offset_pfn = 0; - for_each_sg(sglist, sg, nelems, i) { - int nr_pages = aligned_nrpages(sg->offset, sg->length); - ret = domain_pfn_mapping(domain, start_vpfn + offset_pfn, - page_to_dma_pfn(sg_page(sg)), - nr_pages, prot); - if (ret) { - /* clear the page */ - dma_pte_clear_range(domain, start_vpfn, - start_vpfn + offset_pfn); - /* free page tables */ - dma_pte_free_pagetable(domain, start_vpfn, - start_vpfn + offset_pfn); - /* free iova */ - __free_iova(&domain->iovad, iova); - return 0; - } - sg->dma_address = ((dma_addr_t)(start_vpfn + offset_pfn) - << VTD_PAGE_SHIFT) + sg->offset; - sg->dma_length = sg->length; - offset_pfn += nr_pages; + + ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); + if (unlikely(ret)) { + /* clear the page */ + dma_pte_clear_range(domain, start_vpfn, + start_vpfn + size - 1); + /* free page tables */ + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + size - 1); + /* free iova */ + __free_iova(&domain->iovad, iova); + return 0; } /* it's a non-present to present mapping. Only flush if caching mode */ From 9051aa0268dc1c3e42cd79a802b0af1f2bfcadae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 12:30:54 +0100 Subject: [PATCH 188/741] intel-iommu: Combine domain_pfn_mapping() and domain_sg_mapping() Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 62 ++++++++++++++------------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 28bd5f2d78fc..14308533b1cb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1635,14 +1635,14 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } -static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - struct scatterlist *sg, unsigned long nr_pages, - int prot) +static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) { struct dma_pte *first_pte = NULL, *pte = NULL; - uint64_t pteval; + phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long sg_res = 0; + unsigned long sg_res; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1651,6 +1651,13 @@ static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + if (sg) + sg_res = 0; + else { + sg_res = nr_pages + 1; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + } + while (nr_pages--) { if (!sg_res) { sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; @@ -1685,43 +1692,18 @@ static int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } -static int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, - int prot) +static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); +} - BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); - - if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) - return -EINVAL; - - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; - - while (nr_pages--) { - if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); - if (!pte) - return -ENOMEM; - } - /* We don't need lock here, nobody else - * touches the iova range - */ - BUG_ON(dma_pte_addr(pte)); - pte->val = (phys_pfn << VTD_PAGE_SHIFT) | prot; - pte++; - if (!nr_pages || - (unsigned long)pte >> VTD_PAGE_SHIFT != - (unsigned long)first_pte >> VTD_PAGE_SHIFT) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - pte = NULL; - } - iov_pfn++; - phys_pfn++; - } - return 0; +static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); } static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) From 1bf20f0dc5629032ddd07617139d9fbca66c1642 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 29 Jun 2009 22:06:43 +0100 Subject: [PATCH 189/741] intel-iommu: dump mappings but don't die on pte already set Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 14308533b1cb..40ce5a03f18f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1673,7 +1673,16 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - BUG_ON(dma_pte_addr(pte)); + if (unlikely(dma_pte_addr(pte))) { + static int dumps = 5; + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx)\n", + iov_pfn, pte->val); + if (dumps) { + dumps--; + debug_dma_dump_mappings(NULL); + } + WARN_ON(1); + } pte->val = pteval; pte++; if (!nr_pages || From 3d7b0e4154b4963d6bd39991ec8eaa09caeb3994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jun 2009 03:38:09 +0100 Subject: [PATCH 190/741] intel-iommu: Don't free too much in dma_pte_free_pagetable() The loop condition was wrong -- we should free a PMD only if its _entire_ range is within the range we're intending to clear. The early-termination condition was right, but not the loop. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 40ce5a03f18f..35bdd2a06caa 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -815,7 +815,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp <= last_pfn) { + while (tmp + level_size(level) - 1 <= last_pfn) { pte = dma_pfn_level_pte(domain, tmp, level); if (pte) { free_pgtable_page( From f3a0a52fff4dbfdea2dccc908d00c038481d888e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 30 Jun 2009 03:40:07 +0100 Subject: [PATCH 191/741] intel-iommu: Performance improvement for dma_pte_free_pagetable() As with other functions, batch the CPU data cache flushes and don't keep recalculating PTE addresses. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 35bdd2a06caa..ec7e032d5ab5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -797,7 +797,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - struct dma_pte *pte; + struct dma_pte *first_pte, *pte; int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; @@ -805,25 +805,32 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); - /* we don't need lock here, nobody else touches the iova range */ + /* We don't need lock here; nobody else touches the iova range */ level = 2; while (level <= total) { tmp = align_to_level(start_pfn, level); - /* Only clear this pte/pmd if we're asked to clear its - _whole_ range */ + /* If we can't even clear one PTE at this level, we're done */ if (tmp + level_size(level) - 1 > last_pfn) return; while (tmp + level_size(level) - 1 <= last_pfn) { - pte = dma_pfn_level_pte(domain, tmp, level); - if (pte) { - free_pgtable_page( - phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + if (!pte) { + tmp = align_to_level(tmp + 1, level + 1); + continue; } - tmp += level_size(level); + while (tmp + level_size(level) - 1 <= last_pfn && + (unsigned long)pte >> VTD_PAGE_SHIFT == + (unsigned long)first_pte >> VTD_PAGE_SHIFT) { + free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); + dma_clear_pte(pte); + pte++; + tmp += level_size(level); + } + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + } level++; } From 6a84c234da06a4ac0c1b4c819b83cf264674c2d8 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sun, 28 Jun 2009 01:41:52 -0600 Subject: [PATCH 192/741] ASoC: Fix typo in MPC5200 PSC AC97 driver Kconfig ALSA SoC drivers should be specify SND_SOC_AC97_BUS instead, not AC97_BUS. Without SND_SOC_AC97_BUS defined, an AC97 device will not get correctly registered on the AC97 bus, which prevents thinks like the WM9712 touchscreen driver from getting probed. Tested against 2.6.31-rc1. Signed-off-by: Grant Likely Acked-by: Jon Smirl Signed-off-by: Mark Brown --- sound/soc/fsl/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 5dbebf82249c..5661876ee837 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -33,7 +33,7 @@ config SND_SOC_MPC5200_I2S config SND_SOC_MPC5200_AC97 tristate "Freescale MPC5200 PSC in AC97 mode driver" depends on PPC_MPC52xx && PPC_BESTCOMM - select AC97_BUS + select SND_SOC_AC97_BUS select SND_MPC52xx_DMA select PPC_BESTCOMM_GEN_BD help From 40d9ec14e7e1f62d2379ecc1b5ee00ddfc2a5d0c Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sun, 28 Jun 2009 01:42:06 -0600 Subject: [PATCH 193/741] ASoC: remove BROKEN from Efika and pcm030 fabric drivers The needed spin_event_timeout() macro is now merged in from the powerpc tree, so these drivers are no longer broken. This reverts commit 0c0e09e21a9e7bc6ca54e06ef3d497255ca26383 (ASoC: Mark MPC5200 AC97 as BROKEN until PowerPC merge issues are resolved) Tested against 2.6.31-rc1. Signed-off-by: Grant Likely Acked-by: Jon Smirl Signed-off-by: Mark Brown --- sound/soc/fsl/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 5661876ee837..8cb65ccad35f 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -41,7 +41,7 @@ config SND_SOC_MPC5200_AC97 config SND_MPC52xx_SOC_PCM030 tristate "SoC AC97 Audio support for Phytec pcm030 and WM9712" - depends on PPC_MPC5200_SIMPLE && BROKEN + depends on PPC_MPC5200_SIMPLE select SND_SOC_MPC5200_AC97 select SND_SOC_WM9712 help @@ -50,7 +50,7 @@ config SND_MPC52xx_SOC_PCM030 config SND_MPC52xx_SOC_EFIKA tristate "SoC AC97 Audio support for bbplan Efika and STAC9766" - depends on PPC_EFIKA && BROKEN + depends on PPC_EFIKA select SND_SOC_MPC5200_AC97 select SND_SOC_STAC9766 help From 1bdd7419910c1506151e7b9e2d60c6980e015f76 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sun, 28 Jun 2009 00:21:05 +0200 Subject: [PATCH 194/741] ASoC: OMAP: fix OMAP1510 broken PCM pointer callback This patch tries to work around the problem of broken OMAP1510 PCM playback pointer calculation by replacing DMA function call that incorrectly tries to read the value form DMA hardware with a value computed locally from an already maintained variable omap_runtime_data.period_index. Tested on OMAP5910 based Amstrad Delta (E3) using work in progress ASoC driver. Based on linux-2.6-asoc.git v2.6.31-rc1. Signed-off-by: Janusz Krzysztofik Acked-by: Jarkko Nikula Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown --- sound/soc/omap/omap-pcm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c index 6454e15f7d28..84a1950880eb 100644 --- a/sound/soc/omap/omap-pcm.c +++ b/sound/soc/omap/omap-pcm.c @@ -216,12 +216,15 @@ static snd_pcm_uframes_t omap_pcm_pointer(struct snd_pcm_substream *substream) dma_addr_t ptr; snd_pcm_uframes_t offset; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - ptr = omap_get_dma_src_pos(prtd->dma_ch); - else + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { ptr = omap_get_dma_dst_pos(prtd->dma_ch); + offset = bytes_to_frames(runtime, ptr - runtime->dma_addr); + } else if (!(cpu_is_omap1510())) { + ptr = omap_get_dma_src_pos(prtd->dma_ch); + offset = bytes_to_frames(runtime, ptr - runtime->dma_addr); + } else + offset = prtd->period_index * runtime->period_size; - offset = bytes_to_frames(runtime, ptr - runtime->dma_addr); if (offset >= runtime->buffer_size) offset = 0; From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 30 Jun 2009 16:07:19 +1000 Subject: [PATCH 195/741] perf_counter: Provide a way to enable counters on exec This provides a way to mark a counter to be enabled on the next exec. This is useful for measuring the total activity of a program without including overhead from the process that launches it. This also changes the perf stat command to use this new facility. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- kernel/perf_counter.c | 50 ++++++++++++++++++++++++++++++++++++ tools/perf/builtin-stat.c | 6 ++--- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3078e23c91eb..5e970c7d3fd5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -179,8 +179,9 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 66ab1e9d1294..d55a50da2347 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1428,6 +1428,53 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) perf_counter_task_sched_in(curr, cpu); } +/* + * Enable all of a task's counters that have been marked enable-on-exec. + * This expects task == current. + */ +static void perf_counter_enable_on_exec(struct task_struct *task) +{ + struct perf_counter_context *ctx; + struct perf_counter *counter; + unsigned long flags; + int enabled = 0; + + local_irq_save(flags); + ctx = task->perf_counter_ctxp; + if (!ctx || !ctx->nr_counters) + goto out; + + __perf_counter_task_sched_out(ctx); + + spin_lock(&ctx->lock); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (!counter->attr.enable_on_exec) + continue; + counter->attr.enable_on_exec = 0; + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + continue; + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; + enabled = 1; + } + + /* + * Unclone this context if we enabled any counter. + */ + if (enabled && ctx->parent_ctx) { + put_ctx(ctx->parent_ctx); + ctx->parent_ctx = NULL; + } + + spin_unlock(&ctx->lock); + + perf_counter_task_sched_in(task, smp_processor_id()); + out: + local_irq_restore(flags); +} + /* * Cross CPU call to read the hardware counter */ @@ -2949,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task) { struct perf_comm_event comm_event; + if (task->perf_counter_ctxp) + perf_counter_enable_on_exec(task); + if (!atomic_read(&nr_comm_counters)) return; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 201ef2367dcb..2e03524a1de0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -116,8 +116,9 @@ static void create_perf_stat_counter(int counter, int pid) fd[cpu][counter], strerror(errno)); } } else { - attr->inherit = inherit; - attr->disabled = 1; + attr->inherit = inherit; + attr->disabled = 1; + attr->enable_on_exec = 1; fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) @@ -262,7 +263,6 @@ static int run_perf_stat(int argc, const char **argv) * Enable counters and exec the command: */ t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); close(go_pipe[1]); wait(&status); From 874d2f61d31e596c36af7732dc1b3aa2dc233824 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 30 Jun 2009 15:18:14 +0100 Subject: [PATCH 196/741] dm exception store: really fix type lookup Fix exception store name handling. We need to reference exception store by zero terminated string. Fixes regression introduced in commit f6bd4eb73cdf2a5bf954e497972842f39cabb7e3 Cc: Yi Yang Cc: Jonathan Brassow Cc: stable@kernel.org Cc: Andrew Morton Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c3ae51584b12..3710ff88fc10 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -195,7 +195,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, struct dm_exception_store **store) { int r = 0; - struct dm_exception_store_type *type; + struct dm_exception_store_type *type = NULL; struct dm_exception_store *tmp_store; char persistent; @@ -211,12 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, } persistent = toupper(*argv[1]); - if (persistent != 'P' && persistent != 'N') { + if (persistent == 'P') + type = get_type("P"); + else if (persistent == 'N') + type = get_type("N"); + else { ti->error = "Persistent flag is not P or N"; return -EINVAL; } - type = get_type(&persistent); if (!type) { ti->error = "Exception store type not recognised"; r = -EINVAL; From ea9df47cc92573b159ef3b4fda516c32cba9c4fd Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 30 Jun 2009 15:18:17 +0100 Subject: [PATCH 197/741] dm table: fix blk_stack_limits arg to use bytes not sectors The offset passed to blk_stack_limits() must be in bytes not sectors. Fixes false warnings like the following: device-mapper: table: 254:1: target device sda6 is misaligned Signed-off-by: Mike Snitzer Reported-by: Frans Pop Tested-by: Frans Pop Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4899ebe767c8..2cba557d9e61 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -495,7 +495,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, return 0; } - if (blk_stack_limits(limits, &q->limits, start) < 0) + if (blk_stack_limits(limits, &q->limits, start << 9) < 0) DMWARN("%s: target device %s is misaligned", dm_device_name(ti->table->md), bdevname(bdev, b)); From f5812a7a336fb952d819e4427b9a2dce02368e82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 11:43:17 -0300 Subject: [PATCH 198/741] perf_counter tools: Adjust only prelinked symbol's addresses I.e. we can't handle these two kinds of files in the same way: 1) prelinked system library: [acme@doppio pahole]$ readelf -s /usr/lib64/libdw-0.141.so | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 278: 00000030450105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf@@ELFUTILS_0.122 2) not prelinked library with debug information from a -debuginfo package: [acme@doppio pahole]$ readelf -s /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 629: 00000000000105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf [acme@doppio pahole]$ Now the numbers I got for a pahole perf run are in line with the numbers I get from oprofile. Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090630144317.GB12663@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 16 +++++++++++----- tools/perf/util/symbol.h | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9c659ef6aec2..78c2efde01b7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -520,7 +520,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - + self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL; elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; @@ -535,11 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; - if (verbose >= 2) - printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", - (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + if (self->prelinked) { + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } f = symbol__new(sym.st_value, sym.st_size, elf_sym__name(&sym, symstrs), @@ -573,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) if (!name) return -1; + self->prelinked = 0; + if (strncmp(self->name, "/tmp/perf-", 10) == 0) return dso__load_perf_map(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 940b432db16e..2c48ace8203b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -20,8 +20,9 @@ struct symbol { struct dso { struct list_head node; struct rb_root syms; - unsigned int sym_priv_size; struct symbol *(*find_symbol)(struct dso *, u64 ip); + unsigned int sym_priv_size; + unsigned char prelinked; char name[0]; }; From b4c458b3a23d76936e76678f2074b1528f129f7a Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Mon, 29 Jun 2009 03:26:53 +0200 Subject: [PATCH 199/741] fuse: fix return value of fuse_dev_write() On 64 bit systems -- where sizeof(ssize_t) > sizeof(int) -- the following test exposes a bug due to a non-careful return of an int or unsigned value: implement a FUSE filesystem which sends an unsolicited notification to the kernel with invalid opcode. The respective write to /dev/fuse will return (1 << 32) - EINVAL with errno == 0 instead of -1 with errno == EINVAL. Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8fed2ed12f38..8a11a8c67c42 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -910,7 +910,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; From 201fa69a2849536ef2912e8e971ec0b01c04eff4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 30 Jun 2009 20:06:24 +0200 Subject: [PATCH 200/741] fuse: fix bad return value in fuse_file_poll() Fix fuse_file_poll() which returned a -errno value instead of a poll mask. Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fce6ce694fde..cbc464043b6f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) req = fuse_get_req(fc); if (IS_ERR(req)) - return PTR_ERR(req); + return POLLERR; req->in.h.opcode = FUSE_POLL; req->in.h.nodeid = ff->nodeid; From e0a43ddcc08c34dbd666d93600fd23914505f4aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 30 Jun 2009 20:12:23 +0200 Subject: [PATCH 201/741] fuse: allow umask processing in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch lets filesystems handle masking the file mode on creation. This is needed if filesystem is using ACLs. - The CREATE, MKDIR and MKNOD requests are extended with a "umask" parameter. - A new FUSE_DONT_MASK flag is added to the INIT request/reply. With this the filesystem may request that the create mode is not masked. CC: Jean-Pierre André Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 20 +++++++++++++++++--- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 9 ++++++++- include/linux/fuse.h | 20 ++++++++++++++++++-- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3089a083d30..6b700734e519 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; - struct fuse_open_in inarg; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; @@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aaf2f9ff970e..ede4f77b2d6c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -446,6 +446,9 @@ struct fuse_conn { /** Do multi-page cached writes */ unsigned big_writes:1; + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8673ccf90b7..6cc501bd0187 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -725,6 +725,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; + if (arg->flags & FUSE_DONT_MASK) + fc->dont_mask = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -748,7 +750,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -864,6 +866,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_put_conn; + /* Handle umasking inside the fuse code */ + if (sb->s_flags & MS_POSIXACL) + fc->dont_mask = 1; + sb->s_flags |= MS_POSIXACL; + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d41ed593f79f..e2b816a62488 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -25,6 +25,9 @@ * - add IOCTL message * - add unsolicited notification support * - add POLL message and NOTIFY_POLL notification + * + * 7.12 + * - add umask flag to input argument of open, mknod and mkdir */ #ifndef _LINUX_FUSE_H @@ -36,7 +39,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 11 +#define FUSE_KERNEL_MINOR_VERSION 12 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -112,6 +115,7 @@ struct fuse_file_lock { * INIT request/reply flags * * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." + * FUSE_DONT_MASK: don't apply umask to file mode on create operations */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -119,6 +123,7 @@ struct fuse_file_lock { #define FUSE_ATOMIC_O_TRUNC (1 << 3) #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) +#define FUSE_DONT_MASK (1 << 6) /** * CUSE INIT request/reply flags @@ -262,14 +267,18 @@ struct fuse_attr_out { struct fuse_attr attr; }; +#define FUSE_COMPAT_MKNOD_IN_SIZE 8 + struct fuse_mknod_in { __u32 mode; __u32 rdev; + __u32 umask; + __u32 padding; }; struct fuse_mkdir_in { __u32 mode; - __u32 padding; + __u32 umask; }; struct fuse_rename_in { @@ -300,8 +309,15 @@ struct fuse_setattr_in { }; struct fuse_open_in { + __u32 flags; + __u32 unused; +}; + +struct fuse_create_in { __u32 flags; __u32 mode; + __u32 umask; + __u32 padding; }; struct fuse_open_out { From 3b463ae0c6264f70e5d4c0a9c46af20fed43c96e Mon Sep 17 00:00:00 2001 From: John Muir Date: Sun, 31 May 2009 11:13:57 -0400 Subject: [PATCH 202/741] fuse: invalidation reverse calls Add notification messages that allow the filesystem to invalidate VFS caches. Two notifications are added: 1) inode invalidation - invalidate cached attributes - invalidate a range of pages in the page cache (this is optional) 2) dentry invalidation - try to invalidate a subtree in the dentry cache Care must be taken while accessing the 'struct super_block' for the mount, as it can go away while an invalidation is in progress. To prevent this, introduce a rw-semaphore, that is taken for read during the invalidation and taken for write in the ->kill_sb callback. Cc: Csaba Henk Cc: Anand Avati Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/dir.c | 37 ++++++++++++++++++++ fs/fuse/fuse_i.h | 24 +++++++++++++ fs/fuse/inode.c | 59 ++++++++++++++++++++++++++++++-- include/linux/fuse.h | 16 +++++++++ 5 files changed, 214 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8a11a8c67c42..f58ecbc416c8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -849,6 +849,81 @@ err: return err; } +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err = -EINVAL; + + if (size != sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_inode(fc->sb, outarg.ino, + outarg.off, outarg.len); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err = -EINVAL; + char buf[FUSE_NAME_MAX+1]; + struct qstr name; + + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6b700734e519..e703654e7f40 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -859,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, return err; } +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + dput(entry); + err = 0; + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ede4f77b2d6c..52b641fc0faf 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -484,6 +484,12 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** Super block for this connection. */ + struct super_block *sb; + + /** Read/write semaphore to hold when accessing sb. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -511,6 +517,11 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +/** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + /** * Get a filled in inode */ @@ -711,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + */ +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name); + int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); ssize_t fuse_direct_io(struct file *file, const char __user *buf, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6cc501bd0187..f91ccc4a189d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return inode; } +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + pgoff_t pg_start; + pgoff_t pg_end; + + inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + fuse_invalidate_attr(inode); + if (offset >= 0) { + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len <= 0) + pg_end = -1; + else + pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + } + iput(inode); + return 0; +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); + init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); @@ -862,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->dev = sb->s_dev; + fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_put_conn; @@ -948,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type, return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } +static void fuse_kill_sb_anon(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_sb_anon, }; #ifdef CONFIG_BLOCK @@ -965,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, mnt); } +static void fuse_kill_sb_blk(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index e2b816a62488..cf593bf9fd32 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -28,6 +28,8 @@ * * 7.12 * - add umask flag to input argument of open, mknod and mkdir + * - add notification messages for invalidation of inodes and + * directory entries */ #ifndef _LINUX_FUSE_H @@ -229,6 +231,8 @@ enum fuse_opcode { enum fuse_notify_code { FUSE_NOTIFY_POLL = 1, + FUSE_NOTIFY_INVAL_INODE = 2, + FUSE_NOTIFY_INVAL_ENTRY = 3, FUSE_NOTIFY_CODE_MAX, }; @@ -524,4 +528,16 @@ struct fuse_dirent { #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +struct fuse_notify_inval_inode_out { + __u64 ino; + __s64 off; + __s64 len; +}; + +struct fuse_notify_inval_entry_out { + __u64 parent; + __u32 namelen; + __u32 padding; +}; + #endif /* _LINUX_FUSE_H */ From 01e532981460594fffbf9b992ecfc96a78369924 Mon Sep 17 00:00:00 2001 From: Naohiro Ooiwa Date: Tue, 30 Jun 2009 12:44:19 -0700 Subject: [PATCH 203/741] bnx2x: Fix the behavior of ethtool when ONBOOT=no This is the same fix as commit 7959ea254ed18faee41160b1c50b3c9664735967 ("bnx2: Fix the behavior of ethtool when ONBOOT=no"), but for bnx2x: -------------------- When configure in ifcfg-eth* is ONBOOT=no, the behavior of ethtool command is wrong. # grep ONBOOT /etc/sysconfig/network-scripts/ifcfg-eth2 ONBOOT=no # ethtool eth2 | tail -n1 Link detected: yes I think "Link detected" should be "no". -------------------- Signed-off-by: Naohiro Ooiwa Acked-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index fbf1352e9c1c..951714a7f90a 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -8637,6 +8637,14 @@ static int bnx2x_nway_reset(struct net_device *dev) return 0; } +static u32 +bnx2x_get_link(struct net_device *dev) +{ + struct bnx2x *bp = netdev_priv(dev); + + return bp->link_vars.link_up; +} + static int bnx2x_get_eeprom_len(struct net_device *dev) { struct bnx2x *bp = netdev_priv(dev); @@ -10034,7 +10042,7 @@ static struct ethtool_ops bnx2x_ethtool_ops = { .get_msglevel = bnx2x_get_msglevel, .set_msglevel = bnx2x_set_msglevel, .nway_reset = bnx2x_nway_reset, - .get_link = ethtool_op_get_link, + .get_link = bnx2x_get_link, .get_eeprom_len = bnx2x_get_eeprom_len, .get_eeprom = bnx2x_get_eeprom, .set_eeprom = bnx2x_set_eeprom, From 008440e3ad4b72f5048d1b1f6f5ed894fdc5ad08 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 30 Jun 2009 12:47:19 -0700 Subject: [PATCH 204/741] ipv4: Fix fib_trie rebalancing, part 3 Alas current delaying of freeing old tnodes by RCU in trie_rebalance is still not enough because we can free a top tnode before updating a t->trie pointer. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 012cf5a68581..00a54b246dfe 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1021,6 +1021,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) (struct node *)tn, wasfull); tp = node_parent((struct node *) tn); + if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); if (!tp) break; From f7c2df9b55212d5ec94169a4de11e44c683e0af4 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 30 Jun 2009 21:10:13 +0100 Subject: [PATCH 205/741] AFS: Fix lock imbalance Don't unlock on vfs_rejected_lock path in afs_do_setlk, since the lock is unlocked after abort_attempt label. Signed-off-by: Jiri Slaby Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/flock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 210acafe4a9b..3ff8bdd18fb3 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -432,7 +432,6 @@ vfs_rejected_lock: list_del_init(&fl->fl_u.afs.link); if (list_empty(&vnode->granted_locks)) afs_defer_unlock(vnode, key); - spin_unlock(&vnode->lock); goto abort_attempt; } From 2cdb3f1d834aab27a927be7555fbf4f9e43e9261 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Jun 2009 19:01:19 -0700 Subject: [PATCH 206/741] x86/PCI: fix boundary checking when using root CRS Don't touch info->res_num if we are out of space. Acked-by: Gary Hade Tested-by: Gary Hade Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b26626dc517c..8bf152910eb0 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -68,6 +68,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) unsigned long flags; struct resource *root; int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; + u64 start, end; + + if (bus_has_transparent_bridge(info->bus)) + max_root_bus_resources -= 3; status = resource_to_addr(acpi_res, &addr); if (!ACPI_SUCCESS(status)) @@ -84,25 +88,24 @@ setup_resource(struct acpi_resource *acpi_res, void *data) } else return AE_OK; - res = &info->res[info->res_num]; - res->name = info->name; - res->flags = flags; - res->start = addr.minimum + addr.translation_offset; - res->end = res->start + addr.address_length - 1; - res->child = NULL; - - if (bus_has_transparent_bridge(info->bus)) - max_root_bus_resources -= 3; + start = addr.minimum + addr.translation_offset; + end = start + addr.address_length - 1; if (info->res_num >= max_root_bus_resources) { printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " "from %s for %s due to _CRS returning more than " - "%d resource descriptors\n", (unsigned long) res->start, - (unsigned long) res->end, root->name, info->name, + "%d resource descriptors\n", (unsigned long) start, + (unsigned long) end, root->name, info->name, max_root_bus_resources); - info->res_num++; return AE_OK; } + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + if (insert_resource(root, res)) { printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " "from %s for %s\n", (unsigned long) res->start, From 626fdfec1588ac1341a37805809d03a719d977e0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Jun 2009 20:00:12 -0700 Subject: [PATCH 207/741] x86/PCI: get root CRS before scanning children This allows us to remove adjust_transparent_bridge_resources and give x86_pci_root_bus_res_quirks a chance when _CRS is not used or not there. Acked-by: Gary Hade Tested-by: Gary Hade Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 32 +++++++++----------------------- arch/x86/pci/amd_bus.c | 8 ++++++-- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 8bf152910eb0..1014eb4bfc37 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -117,23 +117,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } -static void -adjust_transparent_bridge_resources(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - int i; - u16 class = dev->class >> 8; - - if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) { - for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) - dev->subordinate->resource[i] = - dev->bus->resource[i - 3]; - } - } -} - static void get_current_resources(struct acpi_device *device, int busnum, int domain, struct pci_bus *bus) @@ -161,8 +144,6 @@ get_current_resources(struct acpi_device *device, int busnum, info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, &info); - if (info.res_num) - adjust_transparent_bridge_resources(bus); return; @@ -225,8 +206,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do */ memcpy(bus->sysdata, sd, sizeof(*sd)); kfree(sd); - } else - bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + } else { + bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); + if (bus) { + if (pci_probe & PCI_USE__CRS) + get_current_resources(device, busnum, domain, + bus); + bus->subordinate = pci_scan_child_bus(bus); + } + } if (!bus) kfree(sd); @@ -241,8 +229,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif } - if (bus && (pci_probe & PCI_USE__CRS)) - get_current_resources(device, busnum, domain, bus); return bus; } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index f893d6a6e803..3ffa10df20b9 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -100,8 +100,9 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) int j; struct pci_root_info *info; - /* don't go for it if _CRS is used */ - if (pci_probe & PCI_USE__CRS) + /* don't go for it if _CRS is used already */ + if (b->resource[0] != &ioport_resource || + b->resource[1] != &iomem_resource) return; /* if only one root bus, don't need to anything */ @@ -116,6 +117,9 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) if (i == pci_root_num) return; + printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", + b->number); + info = &pci_root_info[i]; for (j = 0; j < info->res_num; j++) { struct resource *res; From 2be8412c6cef97b01dfaae71c04bf585d3d93a3b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 30 Jun 2009 14:02:00 -0700 Subject: [PATCH 208/741] [IA64] sprintf should not be used with same source & destination address This happens to work at the moment but isn't a good idea so fix it the simple way. Resolves-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13576 Signed-off-by: Alan Cox Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 76645cf6ac5d..25831c47c579 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -435,7 +435,8 @@ void sn_generate_path(struct pci_bus *pci_bus, char *address) bricktype = MODULE_GET_BTYPE(moduleid); if ((bricktype == L1_BRICKTYPE_191010) || (bricktype == L1_BRICKTYPE_1932)) - sprintf(address, "%s^%d", address, geo_slot(geoid)); + sprintf(address + strlen(address), "^%d", + geo_slot(geoid)); } void __devinit From 58782b34e9ffcc04619634efe9658263344ed188 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:07 -0700 Subject: [PATCH 209/741] [IA64] Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Tony Luck --- arch/ia64/kernel/esi.c | 2 +- arch/ia64/kvm/process.c | 6 +++--- arch/ia64/kvm/vcpu.c | 2 +- arch/ia64/kvm/vtlb.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c index ebf4e988e78c..d5764a3d74af 100644 --- a/arch/ia64/kernel/esi.c +++ b/arch/ia64/kernel/esi.c @@ -65,7 +65,7 @@ static int __init esi_init (void) } if (!esi) - return -ENODEV;; + return -ENODEV; systab = __va(esi); diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index a8f84da04b49..bb862fb224f2 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -130,7 +130,7 @@ static void collect_interruption(struct kvm_vcpu *vcpu) if (vdcr & IA64_DCR_PP) { vpsr |= IA64_PSR_PP; } else { - vpsr &= ~IA64_PSR_PP;; + vpsr &= ~IA64_PSR_PP; } vcpu_set_psr(vcpu, vpsr); @@ -594,11 +594,11 @@ static void set_pal_call_data(struct kvm_vcpu *vcpu) p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); break; case PAL_BRAND_INFO: - p->u.pal_data.gr29 = gr29;; + p->u.pal_data.gr29 = gr29; p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30); break; default: - p->u.pal_data.gr29 = gr29;; + p->u.pal_data.gr29 = gr29; p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); } p->u.pal_data.gr28 = gr28; diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index a2c6c15e4761..46b02cbcc874 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -406,7 +406,7 @@ void getreg(unsigned long regnum, unsigned long *val, * Now look at registers in [0-31] range and init correct UNAT */ addr = (unsigned long)regs; - unat = ®s->eml_unat;; + unat = ®s->eml_unat; addr += gr_info[regnum]; diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 4290a429bf7c..20b3852f7a6e 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -135,7 +135,7 @@ struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) u64 rid; rid = vcpu_get_rr(vcpu, va); - rid = rid & RR_RID_MASK;; + rid = rid & RR_RID_MASK; if (type == D_TLB) { if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; @@ -518,7 +518,7 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) struct thash_cb *hcb = &v->arch.vtlb; - cch = __vtr_lookup(v, va, is_data);; + cch = __vtr_lookup(v, va, is_data); if (cch) return cch; From fa276f36f3d8743295e067fb483b42dca8bd1ece Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 30 Jun 2009 12:01:57 +0100 Subject: [PATCH 210/741] [IA64] address compiler warnings perfmon.c/salinfo.c perfmon.c has a dubious cast directly from "int" to "void *". Add an intermediate cast to "long" to keep gcc happy. salinfo.c uses "down_trylock()" in a highly creative way (explained in the comments in the file) ... but it does kick out this warning: arch/ia64/kernel/salinfo.c:195: warning: ignoring return value of 'down_trylock' which people occasionally try to "fix" in ways that do not work. Use some casts to keep gcc quiet. Signed-off-by: Jan Beulich Signed-off-by: Tony Luck --- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/kernel/salinfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index abce2468a40b..f1782705b1f7 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5603,7 +5603,7 @@ pfm_interrupt_handler(int irq, void *arg) * /proc/perfmon interface, for debug only */ -#define PFM_PROC_SHOW_HEADER ((void *)nr_cpu_ids+1) +#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) static void * pfm_proc_start(struct seq_file *m, loff_t *pos) diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 7053c55b7649..e6676fca4828 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -192,7 +192,7 @@ struct salinfo_platform_oemdata_parms { static void salinfo_work_to_do(struct salinfo_data *data) { - down_trylock(&data->mutex); + (void)(down_trylock(&data->mutex) ?: 0); up(&data->mutex); } From fadfd2b6ba8838a6cc458dbae214993a177a3ee9 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 30 Jun 2009 14:28:54 -0700 Subject: [PATCH 211/741] Add Fenghua Yu as temporary co-maintainer for ia64 I'm taking my sabbatical from Intel for July/August 2009. Fenghua Yu will handle ia64 architecture while I'm gone. Signed-off-by: Tony Luck --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 05a9a563042f..f3bbc759360c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2851,7 +2851,9 @@ S: Maintained IA64 (Itanium) PLATFORM P: Tony Luck +P: Fenghua Yu M: tony.luck@intel.com +M: fenghua.yu@intel.com L: linux-ia64@vger.kernel.org W: http://www.ia64-linux.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6.git From 9e314996e3dc5189b9b36dce67088e882e989897 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 30 Jun 2009 15:00:20 +0200 Subject: [PATCH 212/741] x86: Fix symbol annotation for arch/x86/lib/clear_page_64.S::clear_page_c Noticed the zero-sized function symbol while looking at 'perf' profiles, it causes the profiler to display those addresses in hexa. Turns out that this was wrong/bogus for an eternity. Signed-off-by: Mike Galbraith Acked-by: Alexander van Heukelum Acked-by: Cyrill Gorcunov LKML-Reference: <1246366820.6538.1.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- arch/x86/lib/clear_page_64.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 9a10a78bb4a4..ebeafcce04a9 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -5,15 +5,14 @@ * Zero a page. * rdi page */ - ALIGN -clear_page_c: +ENTRY(clear_page_c) CFI_STARTPROC movl $4096/8,%ecx xorl %eax,%eax rep stosq ret CFI_ENDPROC -ENDPROC(clear_page) +ENDPROC(clear_page_c) ENTRY(clear_page) CFI_STARTPROC From 25903407da21552419e0955705d6d8c8e601cb2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:20 -0300 Subject: [PATCH 213/741] perf report: Add --dsos parameter So that we can filter by dso. Symbols in other dsos won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-2-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/Makefile | 2 + tools/perf/builtin-report.c | 16 ++ tools/perf/util/strlist.c | 184 +++++++++++++++++++++++ tools/perf/util/strlist.h | 32 ++++ 5 files changed, 238 insertions(+) create mode 100644 tools/perf/util/strlist.c create mode 100644 tools/perf/util/strlist.h diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 40c1db83a16d..13d85ca8c914 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -20,6 +20,10 @@ OPTIONS -i:: --input=:: Input file name. (default: perf.data) +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1c1296d8a64b..9c6d0ae3708e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -301,6 +301,7 @@ LIB_H += util/util.h LIB_H += util/help.h LIB_H += util/strbuf.h LIB_H += util/string.h +LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h @@ -322,6 +323,7 @@ LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o LIB_OBJS += util/string.o +LIB_OBJS += util/strlist.o LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ed391db9e0f8..7c6b6e776718 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,6 +16,7 @@ #include "util/symbol.h" #include "util/string.h" #include "util/callchain.h" +#include "util/strlist.h" #include "perf.h" #include "util/header.h" @@ -32,6 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; +static char *dso_list_str; +static struct strlist *dso_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1272,6 +1275,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (show & show_mask) { struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); + if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1659,6 +1665,8 @@ static const struct option options[] = { OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), + OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", + "only consider symbols in these dsos"), OPT_END() }; @@ -1698,6 +1706,14 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); + if (dso_list_str) { + dso_list = strlist__new(true, dso_list_str); + if (!dso_list) { + fprintf(stderr, "problems parsing dso list\n"); + exit(129); + } + } + setup_pager(); return __cmd_report(); diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c new file mode 100644 index 000000000000..025a78edfffe --- /dev/null +++ b/tools/perf/util/strlist.c @@ -0,0 +1,184 @@ +/* + * (c) 2009 Arnaldo Carvalho de Melo + * + * Licensed under the GPLv2. + */ + +#include "strlist.h" +#include +#include +#include +#include + +static struct str_node *str_node__new(const char *s, bool dupstr) +{ + struct str_node *self = malloc(sizeof(*self)); + + if (self != NULL) { + if (dupstr) { + s = strdup(s); + if (s == NULL) + goto out_delete; + } + self->s = s; + } + + return self; + +out_delete: + free(self); + return NULL; +} + +static void str_node__delete(struct str_node *self, bool dupstr) +{ + if (dupstr) + free((void *)self->s); + free(self); +} + +int strlist__add(struct strlist *self, const char *new_entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + struct str_node *sn; + + while (*p != NULL) { + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, new_entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + sn = str_node__new(new_entry, self->dupstr); + if (sn == NULL) + return -ENOMEM; + + rb_link_node(&sn->rb_node, parent, p); + rb_insert_color(&sn->rb_node, &self->entries); + + return 0; +} + +int strlist__load(struct strlist *self, const char *filename) +{ + char entry[1024]; + int err; + FILE *fp = fopen(filename, "r"); + + if (fp == NULL) + return errno; + + while (fgets(entry, sizeof(entry), fp) != NULL) { + const size_t len = strlen(entry); + + if (len == 0) + continue; + entry[len - 1] = '\0'; + + err = strlist__add(self, entry); + if (err != 0) + goto out; + } + + err = 0; +out: + fclose(fp); + return err; +} + +void strlist__remove(struct strlist *self, struct str_node *sn) +{ + rb_erase(&sn->rb_node, &self->entries); + str_node__delete(sn, self->dupstr); +} + +bool strlist__has_entry(struct strlist *self, const char *entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + struct str_node *sn; + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return true; + } + + return false; +} + +static int strlist__parse_list_entry(struct strlist *self, const char *s) +{ + if (strncmp(s, "file://", 7) == 0) + return strlist__load(self, s + 7); + + return strlist__add(self, s); +} + +int strlist__parse_list(struct strlist *self, const char *s) +{ + char *sep; + int err; + + while ((sep = strchr(s, ',')) != NULL) { + *sep = '\0'; + err = strlist__parse_list_entry(self, s); + *sep = ','; + if (err != 0) + return err; + s = sep + 1; + } + + return *s ? strlist__parse_list_entry(self, s) : 0; +} + +struct strlist *strlist__new(bool dupstr, const char *slist) +{ + struct strlist *self = malloc(sizeof(*self)); + + if (self != NULL) { + self->entries = RB_ROOT; + self->dupstr = dupstr; + if (slist && strlist__parse_list(self, slist) != 0) + goto out_error; + } + + return self; +out_error: + free(self); + return NULL; +} + +void strlist__delete(struct strlist *self) +{ + if (self != NULL) { + struct str_node *pos; + struct rb_node *next = rb_first(&self->entries); + + while (next) { + pos = rb_entry(next, struct str_node, rb_node); + next = rb_next(&pos->rb_node); + strlist__remove(self, pos); + } + self->entries = RB_ROOT; + free(self); + } +} diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h new file mode 100644 index 000000000000..2fb117fb4b67 --- /dev/null +++ b/tools/perf/util/strlist.h @@ -0,0 +1,32 @@ +#ifndef STRLIST_H_ +#define STRLIST_H_ + +#include "rbtree.h" +#include + +struct str_node { + struct rb_node rb_node; + const char *s; +}; + +struct strlist { + struct rb_root entries; + bool dupstr; +}; + +struct strlist *strlist__new(bool dupstr, const char *slist); +void strlist__delete(struct strlist *self); + +void strlist__remove(struct strlist *self, struct str_node *sn); +int strlist__load(struct strlist *self, const char *filename); +int strlist__add(struct strlist *self, const char *str); + +bool strlist__has_entry(struct strlist *self, const char *entry); + +static inline bool strlist__empty(const struct strlist *self) +{ + return rb_first(&self->entries) == NULL; +} + +int strlist__parse_list(struct strlist *self, const char *s); +#endif /* STRLIST_H_ */ From cc8b88b15ab8e5ae162a46c4b6b286b555190dd1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:21 -0300 Subject: [PATCH 214/741] perf report: Add --comms parameter So that we can filter by comm. Symbols in other comms won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-3-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 +++ tools/perf/builtin-report.c | 33 +++++++++++++++++------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 13d85ca8c914..4c44ef1747b9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,6 +24,10 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. +-C:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c6b6e776718..8143477b7ef7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str; -static struct strlist *dso_list; +static char *dso_list_str, *comm_list_str; +static struct strlist *dso_list, *comm_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -240,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip) static inline int is_anon_memory(const char *filename) { - return strcmp(filename, "//anon") == 0; + return strcmp(filename, "//anon") == 0; } static struct map *map__new(struct mmap_event *event) @@ -1253,6 +1253,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) + return 0; + if (event->header.misc & PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1667,6 +1670,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), + OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", + "only consider symbols in these comms"), OPT_END() }; @@ -1685,6 +1690,19 @@ static void setup_sorting(void) free(str); } +static void setup_list(struct strlist **list, const char *list_str, + const char *list_name) +{ + if (list_str) { + *list = strlist__new(true, list_str); + if (!*list) { + fprintf(stderr, "problems parsing %s list\n", + list_name); + exit(129); + } + } +} + int cmd_report(int argc, const char **argv, const char *prefix) { symbol__init(); @@ -1706,13 +1724,8 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); - if (dso_list_str) { - dso_list = strlist__new(true, dso_list_str); - if (!dso_list) { - fprintf(stderr, "problems parsing dso list\n"); - exit(129); - } - } + setup_list(&dso_list, dso_list_str, "dso"); + setup_list(&comm_list, comm_list_str, "comm"); setup_pager(); From 7bec7a9134c25cecb0d7029199b59f7b1bef35b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:22 -0300 Subject: [PATCH 215/741] perf report: Add --symbols parameter So that we can filter by symbol name. The 'pfunct' utility in the 'dwarves' package can be used to create a file with the functions one wants. Example: [acme@doppio pahole]$ pfunct /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | grep dwarf > /tmp/dwarf.symbols [acme@doppio pahole]$ wc -l /tmp/dwarf.symbols 93 /tmp/dwarf.symbols [acme@doppio pahole]$ head -3 /tmp/dwarf.symbols dwfl_addrdwarf dwfl_module_getdwarf dwfl_getdwarf [acme@doppio pahole]$ perf report --sort comm,dso,symbol --comms pahole --dsos /usr/lib64/libdw-0.141.so --symbols file:///tmp/dwarf.symbols 33.99% pahole /usr/lib64/libdw-0.141.so [.] dwarf_tag 29.07% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_file 27.71% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getsrclines 4.54% pahole /usr/lib64/libdw-0.141.so 0x00000000007400 3.93% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_line 0.46% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getlocation 0.18% pahole /usr/lib64/libdw-0.141.so [.] __libdwarf_next_prime 0.13% pahole /usr/lib64/libdw-0.141.so [.] dwarf_diecu [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-4-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4c44ef1747b9..8aa3f8c88707 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -28,6 +28,10 @@ OPTIONS --comms=:: Only consider symbols in these comms. CSV that understands file://filename entries. +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8143477b7ef7..135b7837e6bf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str, *comm_list_str; -static struct strlist *dso_list, *comm_list; +static char *dso_list_str, *comm_list_str, *sym_list_str; +static struct strlist *dso_list, *comm_list, *sym_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1281,6 +1281,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) return 0; + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1672,6 +1675,8 @@ static const struct option options[] = { "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_END() }; @@ -1726,6 +1731,7 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_list(&dso_list, dso_list_str, "dso"); setup_list(&comm_list, comm_list_str, "comm"); + setup_list(&sym_list, sym_list_str, "symbol"); setup_pager(); From 789d03f584484af85dbdc64935270c8e45f36ef7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 30 Jun 2009 11:52:23 +0100 Subject: [PATCH 216/741] x86: Fix fixmap ordering The merge of the 32- and 64-bit fixmap headers made a latent bug on x86-64 a real one: with the right config settings it is possible for FIX_OHCI1394_BASE to overlap the FIX_BTMAP_* range. Signed-off-by: Jan Beulich Cc: # for 2.6.30.x LKML-Reference: <4A4A0A8702000078000082E8@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fixmap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 2d81af3974a0..3eb0f79a5320 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -114,9 +114,6 @@ enum fixed_addresses { FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */ FIX_TEXT_POKE1, __end_of_permanent_fixed_addresses, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - FIX_OHCI1394_BASE, -#endif /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. @@ -129,6 +126,9 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif #ifdef CONFIG_X86_32 FIX_WP_TEST, #endif From 1f208ea67821703fd4de056ea6f0baa81f4ad4a5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:44 +1000 Subject: [PATCH 217/741] perf report: Fix -z option Fix a copy and paste error, -z was setting the group option. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.714204656@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cf0d21f1ae10..5c2965562c5d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -675,7 +675,7 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_STRING('s', "sym-filter", &sym_filter, "pattern", "only display symbols matchig this pattern"), - OPT_BOOLEAN('z', "zero", &group, + OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, "profile at this frequency"), From 6717534ddc328ae5cdf89f1ef802db83fc451f19 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:45 +1000 Subject: [PATCH 218/741] perf_counter tools: Remove zlib dependency The zlib devel libraries may not be installed and since we aren't using zlib we may as well remove it. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.802078956@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9c6d0ae3708e..f572c90f610e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -381,12 +381,6 @@ ifndef CC_LD_DYNPATH endif endif -ifdef ZLIB_PATH - BASIC_CFLAGS += -I$(ZLIB_PATH)/include - EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) -endif -EXTLIBS += -lz - ifdef NEEDS_SOCKET EXTLIBS += -lsocket endif From 2ab52083ffc057014e502cf3473adc41436922fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:46 +1000 Subject: [PATCH 219/741] perf top: Move skip symbols to an array Move the list of symbols we skip into an array, making it easier to add new ones. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.904782938@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5c2965562c5d..731ec6d79c1c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -286,11 +286,22 @@ static void *display_thread(void *arg) return NULL; } +/* Tag samples to be skipped. */ +char *skip_symbols[] = { + "default_idle", + "cpu_idle", + "enter_idle", + "exit_idle", + "mwait_idle", + NULL +}; + static int symbol_filter(struct dso *self, struct symbol *sym) { static int filter_match; struct sym_entry *syme; const char *name = sym->name; + int i; if (!strcmp(name, "_text") || !strcmp(name, "_etext") || @@ -302,13 +313,12 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); - /* Tag samples to be skipped. */ - if (!strcmp("default_idle", name) || - !strcmp("cpu_idle", name) || - !strcmp("enter_idle", name) || - !strcmp("exit_idle", name) || - !strcmp("mwait_idle", name)) - syme->skip = 1; + for (i = 0; skip_symbols[i]; i++) { + if (!strcmp(skip_symbols[i], name)) { + syme->skip = 1; + break; + } + } if (filter_match == 1) { filter_end = sym->start; From 3a3393ef75a14ae259a82f3f38624efa17884168 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:47 +1000 Subject: [PATCH 220/741] perf top: Add ppc64 specific skip symbols and strip ppc64 . prefix Filter out some ppc64 specific idle loop functions and remove leading '.' on ppc64 text symbols. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.995643441@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 731ec6d79c1c..0506cd6e04cc 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -293,6 +293,8 @@ char *skip_symbols[] = { "enter_idle", "exit_idle", "mwait_idle", + "ppc64_runlatch_off", + "pseries_dedicated_idle_sleep", NULL }; @@ -303,6 +305,13 @@ static int symbol_filter(struct dso *self, struct symbol *sym) const char *name = sym->name; int i; + /* + * ppc64 uses function descriptors and appends a '.' to the + * start of every instruction address. Remove it. + */ + if (name[0] == '.') + name++; + if (!strcmp(name, "_text") || !strcmp(name, "_etext") || !strcmp(name, "_sinittext") || From d8db1b57d31a6b30ea2f0df318eab50fc92b38d6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:48 +1000 Subject: [PATCH 221/741] perf report: Fix reporting of hypervisor PERF_EVENT_MISC_* is not a bitmask, so we have to mask and compare. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.088394681@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 135b7837e6bf..88e88c510ae5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1213,6 +1213,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; + int cpumode; if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; @@ -1256,7 +1257,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + + if (cpumode == PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1264,7 +1267,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dprintf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_EVENT_MISC_USER) { show = SHOW_USER; level = '.'; From fb9c818873a788c5c01c9868cc6050df96e2c7df Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:49 +1000 Subject: [PATCH 222/741] perf report: Add hypervisor dso Add a dso for hypervisor samples. We don't get any symbol information on the ppc64 hypervisor but this at least gives us a high level summary of the time spent in there. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.182536873@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 88e88c510ae5..3f5d8ea05ff0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -121,6 +121,7 @@ typedef union event_union { static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; +static struct dso *hypervisor_dso; static void dsos__add(struct dso *dso) { @@ -202,6 +203,11 @@ static int load_kernel(void) dsos__add(vdso); + hypervisor_dso = dso__new("[hypervisor]", 0); + if (!hypervisor_dso) + return -1; + dsos__add(hypervisor_dso); + return err; } @@ -640,7 +646,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self) if (self->sym) { ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); + self->dso == kernel_dso ? 'k' : + self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); } @@ -963,6 +970,9 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1275,6 +1285,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } else { show = SHOW_HV; level = 'H'; + + dso = hypervisor_dso; + dprintf(" ...... dso: [hypervisor]\n"); } From 944c54e7fc5ccf961bef2b5449958436b85de459 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 00:10:16 +0200 Subject: [PATCH 223/741] ia64/PCI: adjust section annotation for pcibios_setup() Should be __init. Acked-by: Tony Luck Signed-off-by: Jesse Barnes --- arch/ia64/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 729298f4b234..7de76dd352fe 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -537,7 +537,7 @@ pcibios_align_resource (void *data, struct resource *res, /* * PCI BIOS setup, always defaults to SAL interface */ -char * __devinit +char * __init pcibios_setup (char *str) { return str; From 8f6c2e4b325a8e9f8f47febb2fd0ed4fae7d45a9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Jul 2009 11:13:45 +1000 Subject: [PATCH 224/741] md: Use new topology calls to indicate alignment and I/O sizes Switch MD over to the new disk_stack_limits() function which checks for aligment and adjusts preferred I/O sizes when stacking. Also indicate preferred I/O sizes where applicable. Signed-off-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: NeilBrown --- drivers/md/linear.c | 4 ++-- drivers/md/multipath.c | 7 ++++--- drivers/md/raid0.c | 9 +++++++-- drivers/md/raid1.c | 9 ++++----- drivers/md/raid10.c | 19 +++++++++++++------ drivers/md/raid5.c | 10 +++++++++- 6 files changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 15c8b7b25a9b..5810fa906af0 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) rdev->sectors = sectors * mddev->chunk_sectors; } - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbe368fa6598..237fe3fd235c 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for (path = first; path <= last; path++) if ((p=conf->multipaths+path)->rdev == NULL) { q = rdev->bdev->bd_disk->queue; - blk_queue_stack_limits(mddev->queue, q); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as @@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev) disk = conf->multipaths + disk_idx; disk->rdev = rdev; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); /* as we don't honour merge_bvec_fn, we must never risk * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ab4a489d8695..335f490dcad6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) } dev[j] = rdev1; - blk_queue_stack_limits(mddev->queue, - rdev1->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev1->bdev, + rdev1->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) mddev->chunk_sectors << 9); goto abort; } + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + printk(KERN_INFO "raid0: done.\n"); mddev->private = conf; return 0; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89939a7aef57..0569efba0c02 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for (mirror = first; mirror <= last; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ae12ceafe10c..7298a5e5a183 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for ( ; mirror <= last ; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int run(mddev_t *mddev) { conf_t *conf; - int i, disk_idx; + int i, disk_idx, chunk_size; mirror_info_t *disk; mdk_rdev_t *rdev; int nc, fc, fo; @@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev) spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + if (conf->raid_disks % conf->near_copies) + blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); + else + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks / conf->near_copies)); + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f9f991e6e138..92ef9b6abfc7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4452,7 +4452,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) static int run(mddev_t *mddev) { raid5_conf_t *conf; - int working_disks = 0; + int working_disks = 0, chunk_size; mdk_rdev_t *rdev; if (mddev->recovery_cp != MaxSector) @@ -4607,6 +4607,14 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks - conf->max_degraded)); + + list_for_each_entry(rdev, &mddev->disks, same_set) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); return 0; abort: From b8d966efd9a46a9a35beac50cbff6e30565125ef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 11:14:04 +1000 Subject: [PATCH 225/741] md: avoid dereferencing NULL pointer when accessing suspend_* sysfs attributes. If we try to modify one of the md/ sysfs files suspend_lo or suspend_hi when the array is not active, we dereference a NULL. Protect against that. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 09be637d52cb..2166af8a7654 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) char *e; unsigned long long new = simple_strtoull(buf, &e, 10); - if (mddev->pers->quiesce == NULL) + if (mddev->pers == NULL || + mddev->pers->quiesce == NULL) return -EINVAL; if (buf == e || (*e && *e != '\n')) return -EINVAL; @@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) char *e; unsigned long long new = simple_strtoull(buf, &e, 10); - if (mddev->pers->quiesce == NULL) + if (mddev->pers == NULL || + mddev->pers->quiesce == NULL) return -EINVAL; if (buf == e || (*e && *e != '\n')) return -EINVAL; From 133890103b9de08904f909995973e4b5c08a780e Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 30 Jun 2009 11:41:11 -0700 Subject: [PATCH 226/741] eventfd: revised interface and cleanups Change the eventfd interface to de-couple the eventfd memory context, from the file pointer instance. Without such change, there is no clean way to racely free handle the POLLHUP event sent when the last instance of the file* goes away. Also, now the internal eventfd APIs are using the eventfd context instead of the file*. This patch is required by KVM's IRQfd code, which is still under development. Signed-off-by: Davide Libenzi Cc: Gregory Haskins Cc: Rusty Russell Cc: Benjamin LaHaise Cc: Avi Kivity Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/lguest/lg.h | 2 +- drivers/lguest/lguest_user.c | 4 +- fs/aio.c | 24 ++----- fs/eventfd.c | 122 +++++++++++++++++++++++++++++++---- include/linux/aio.h | 4 +- include/linux/eventfd.h | 35 +++++++--- 6 files changed, 149 insertions(+), 42 deletions(-) diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index d4e8979735cb..9c3138265f8e 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -82,7 +82,7 @@ struct lg_cpu { struct lg_eventfd { unsigned long addr; - struct file *event; + struct eventfd_ctx *event; }; struct lg_eventfd_map { diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 32e297121058..9f9a2953b383 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -50,7 +50,7 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) /* Now append new entry. */ new->map[new->num].addr = addr; - new->map[new->num].event = eventfd_fget(fd); + new->map[new->num].event = eventfd_ctx_fdget(fd); if (IS_ERR(new->map[new->num].event)) { kfree(new); return PTR_ERR(new->map[new->num].event); @@ -357,7 +357,7 @@ static int close(struct inode *inode, struct file *file) /* Release any eventfds they registered. */ for (i = 0; i < lg->eventfds->num; i++) - fput(lg->eventfds->map[i].event); + eventfd_ctx_put(lg->eventfds->map[i].event); kfree(lg->eventfds); /* If lg->dead doesn't contain an error code it will be NULL or a diff --git a/fs/aio.c b/fs/aio.c index 76da12537956..d065b2c3273e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { assert_spin_locked(&ctx->ctx_lock); + if (req->ki_eventfd != NULL) + eventfd_ctx_put(req->ki_eventfd); if (req->ki_dtor) req->ki_dtor(req); if (req->ki_iovec != &req->ki_inline_vec) @@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data) /* Complete the fput(s) */ if (req->ki_filp != NULL) __fput(req->ki_filp); - if (req->ki_eventfd != NULL) - __fput(req->ki_eventfd); /* Link the iocb into the context's free list */ spin_lock_irq(&ctx->ctx_lock); @@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - int schedule_putreq = 0; - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", req, atomic_long_read(&req->ki_filp->f_count)); @@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * we would not be holding the last reference to the file*, so * this function will be executed w/out any aio kthread wakeup. */ - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) - schedule_putreq++; - else - req->ki_filp = NULL; - if (req->ki_eventfd != NULL) { - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count))) - schedule_putreq++; - else - req->ki_eventfd = NULL; - } - if (unlikely(schedule_putreq)) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); queue_work(aio_wq, &fput_work); - } else + } else { + req->ki_filp = NULL; really_put_req(ctx, req); + } return 1; } @@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; diff --git a/fs/eventfd.c b/fs/eventfd.c index 3f0e1974abdc..31d12de83a2a 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -14,35 +14,44 @@ #include #include #include -#include #include #include +#include +#include struct eventfd_ctx { + struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". A read(2) will return the "count" * value to userspace, and will reset "count" to zero. The kernel - * size eventfd_signal() also, adds to the "count" counter and + * side eventfd_signal() also, adds to the "count" counter and * issue a wakeup. */ __u64 count; unsigned int flags; }; -/* - * Adds "n" to the eventfd counter "count". Returns "n" in case of - * success, or a value lower then "n" in case of coutner overflow. - * This function is supposed to be called by the kernel in paths - * that do not allow sleeping. In this function we allow the counter - * to reach the ULLONG_MAX value, and we signal this as overflow - * condition by returining a POLLERR to poll(2). +/** + * eventfd_signal - Adds @n to the eventfd counter. + * @ctx: [in] Pointer to the eventfd context. + * @n: [in] Value of the counter to be added to the eventfd internal counter. + * The value cannot be negative. + * + * This function is supposed to be called by the kernel in paths that do not + * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * value, and we signal this as overflow condition by returining a POLLERR + * to poll(2). + * + * Returns @n in case of success, a non-negative number lower than @n in case + * of overflow, or the following error codes: + * + * -EINVAL : The value of @n is negative. */ -int eventfd_signal(struct file *file, int n) +int eventfd_signal(struct eventfd_ctx *ctx, int n) { - struct eventfd_ctx *ctx = file->private_data; unsigned long flags; if (n < 0) @@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n) } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free(struct kref *kref) +{ + struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); + + kfree(ctx); +} + +/** + * eventfd_ctx_get - Acquires a reference to the internal eventfd context. + * @ctx: [in] Pointer to the eventfd context. + * + * Returns: In case of success, returns a pointer to the eventfd context. + */ +struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_get); + +/** + * eventfd_ctx_put - Releases a reference to the internal eventfd context. + * @ctx: [in] Pointer to eventfd context. + * + * The eventfd context reference must have been previously acquired either + * with eventfd_ctx_get() or eventfd_ctx_fdget()). + */ +void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + kref_put(&ctx->kref, eventfd_free); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_put); + static int eventfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct eventfd_ctx *ctx = file->private_data; + + wake_up_poll(&ctx->wqh, POLLHUP); + eventfd_ctx_put(ctx); return 0; } @@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = { .write = eventfd_write, }; +/** + * eventfd_fget - Acquire a reference of an eventfd file descriptor. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the eventfd file structure in case of success, or the + * following error pointer: + * + * -EBADF : Invalid @fd file descriptor. + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ struct file *eventfd_fget(int fd) { struct file *file; @@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd) } EXPORT_SYMBOL_GPL(eventfd_fget); +/** + * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointers returned by the following functions: + * + * eventfd_fget + */ +struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + struct file *file; + struct eventfd_ctx *ctx; + + file = eventfd_fget(fd); + if (IS_ERR(file)) + return (struct eventfd_ctx *) file; + ctx = eventfd_ctx_get(file->private_data); + fput(file); + + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); + +/** + * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. + * @file: [in] Eventfd file pointer. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointer: + * + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ +struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) +{ + if (file->f_op != &eventfd_fops) + return ERR_PTR(-EINVAL); + + return eventfd_ctx_get(file->private_data); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); + SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; @@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) if (!ctx) return -ENOMEM; + kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; diff --git a/include/linux/aio.h b/include/linux/aio.h index b16a957030f8..47f7d932a01d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -121,9 +121,9 @@ struct kiocb { /* * If the aio_resfd field of the userspace iocb is not zero, - * this is the underlying file* to deliver event to. + * this is the underlying eventfd context to deliver events to. */ - struct file *ki_eventfd; + struct eventfd_ctx *ki_eventfd; }; #define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index f45a8ae5f828..3b85ba6479f4 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -8,10 +8,8 @@ #ifndef _LINUX_EVENTFD_H #define _LINUX_EVENTFD_H -#ifdef CONFIG_EVENTFD - -/* For O_CLOEXEC and O_NONBLOCK */ #include +#include /* * CAREFUL: Check include/asm-generic/fcntl.h when defining @@ -27,16 +25,37 @@ #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) +#ifdef CONFIG_EVENTFD + +struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx); +void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); -int eventfd_signal(struct file *file, int n); +struct eventfd_ctx *eventfd_ctx_fdget(int fd); +struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); +int eventfd_signal(struct eventfd_ctx *ctx, int n); #else /* CONFIG_EVENTFD */ -#define eventfd_fget(fd) ERR_PTR(-ENOSYS) -static inline int eventfd_signal(struct file *file, int n) -{ return 0; } +/* + * Ugly ugly ugly error layer to support modules that uses eventfd but + * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO. + */ +static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + return ERR_PTR(-ENOSYS); +} -#endif /* CONFIG_EVENTFD */ +static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) +{ + return -ENOSYS; +} + +static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + +} + +#endif #endif /* _LINUX_EVENTFD_H */ From 2a2325e6e8a3782795fb520220c36fd805775972 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 30 Jun 2009 11:41:13 -0700 Subject: [PATCH 227/741] gcov: fix __ctors_start alignment The ctors section for each object file is eight byte aligned (on 64 bit). However the __ctors_start symbol starts at an arbitrary address dependent on the size of the previous sections. Therefore the linker may add some zeroes after __ctors_start to make sure the ctors contents are properly aligned. However the extra zeroes at the beginning aren't expected by the code. When walking the functions pointers contained in there and extra zeroes are added this may result in random jumps. So make sure that the __ctors_start symbol is always aligned as well. Fixes this crash on an allyesconfig on s390: [ 0.582482] Kernel BUG at 0000000000000012 [verbose debug info unavailable] [ 0.582489] illegal operation: 0001 [#1] SMP DEBUG_PAGEALLOC [ 0.582496] Modules linked in: [ 0.582501] CPU: 0 Tainted: G W 2.6.31-rc1-dirty #273 [ 0.582506] Process swapper (pid: 1, task: 000000003f218000, ksp: 000000003f2238e8) [ 0.582510] Krnl PSW : 0704200180000000 0000000000000012 (0x12) [ 0.582518] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3 [ 0.582524] Krnl GPRS: 0000000000036727 0000000000000010 0000000000000001 0000000000000001 [ 0.582529] 00000000001dfefa 0000000000000000 0000000000000000 0000000000000040 [ 0.582534] 0000000001fff0f0 0000000001790628 0000000002296048 0000000002296048 [ 0.582540] 00000000020c438e 0000000001786000 0000000002014a66 000000003f223e60 [ 0.582553] Krnl Code:>0000000000000012: 0000 unknown [ 0.582559] 0000000000000014: 0000 unknown [ 0.582564] 0000000000000016: 0000 unknown [ 0.582570] 0000000000000018: 0000 unknown [ 0.582575] 000000000000001a: 0000 unknown [ 0.582580] 000000000000001c: 0000 unknown [ 0.582585] 000000000000001e: 0000 unknown [ 0.582591] 0000000000000020: 0000 unknown [ 0.582596] Call Trace: [ 0.582599] ([<0000000002014a46>] kernel_init+0x622/0x7a0) [ 0.582607] [<0000000000113e22>] kernel_thread_starter+0x6/0xc [ 0.582615] [<0000000000113e1c>] kernel_thread_starter+0x0/0xc [ 0.582621] INFO: lockdep is turned off. [ 0.582624] Last Breaking-Event-Address: [ 0.582627] [<0000000002014a64>] kernel_init+0x640/0x7a0 Cc: Peter Oberparleiter Cc: Ingo Molnar Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 92b73b6140ff..dccdbed05848 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -441,7 +441,8 @@ } #ifdef CONFIG_CONSTRUCTORS -#define KERNEL_CTORS() VMLINUX_SYMBOL(__ctors_start) = .; \ +#define KERNEL_CTORS() . = ALIGN(8); \ + VMLINUX_SYMBOL(__ctors_start) = .; \ *(.ctors) \ VMLINUX_SYMBOL(__ctors_end) = .; #else From c15e504bd008aedfcd2219051055b66bffdb6148 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Jun 2009 11:41:13 -0700 Subject: [PATCH 228/741] MAINTAINERS: update EDAC-I82975X As per Ranganathan's request. Signed-off-by: Joe Perches Cc: Ranganathan Desikan Cc: Arvind R. Cc: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 05a9a563042f..92fe0796cd1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2130,9 +2130,9 @@ F: drivers/edac/i5400_edac.c EDAC-I82975X P: Ranganathan Desikan -M: rdesikan@jetzbroadband.com +M: ravi@jetztechnologies.com P: Arvind R. -M: arvind@acarlab.com +M: arvind@jetztechnologies.com L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) W: bluesmoke.sourceforge.net S: Maintained From 15e3252464432a29c5461325cb5243471bd2a219 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Jun 2009 11:41:15 -0700 Subject: [PATCH 229/741] fbdev: work around old compiler bug When building with a 4.1.x compiler on powerpc64 (at least) we get this error: drivers/video/logo/logo_linux_mono.c:81: error: logo_linux_mono causes a section type conflict This was introduced by commit ae52bb2384f721562f15f719de1acb8e934733cb ("fbdev: move logo externs to header file"). This is a partial revert of that commit sufficient to not hit the compiler bug. Also convert _clut arrays from __initconst to __initdata. Sam said: Al analysed this some time ago. When we say something is const then _sometimes_ gcc annotate the section as const(?) - sometimes not. So if we have two variables/functions annotated __*const and gcc decides to annotate the section const only in one case we get a section type conflict. Signed-off-by: Stephen Rothwell Cc: Sam Ravnborg Cc: Krzysztof Helt Cc: Geert Uytterhoeven Cc: Kyle McMartin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/pnmtologo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c index 64f5ddb09ea6..5c113123ed9f 100644 --- a/scripts/pnmtologo.c +++ b/scripts/pnmtologo.c @@ -237,7 +237,7 @@ static void write_header(void) fprintf(out, " * Linux logo %s\n", logoname); fputs(" */\n\n", out); fputs("#include \n\n", out); - fprintf(out, "static const unsigned char %s_data[] __initconst = {\n", + fprintf(out, "static unsigned char %s_data[] __initdata = {\n", logoname); } @@ -374,7 +374,7 @@ static void write_logo_clut224(void) fputs("\n};\n\n", out); /* write logo clut */ - fprintf(out, "static const unsigned char %s_clut[] __initconst = {\n", + fprintf(out, "static unsigned char %s_clut[] __initdata = {\n", logoname); write_hex_cnt = 0; for (i = 0; i < logo_clutsize; i++) { From b01e8dc34379f4ba2f454390e340a025edbaaa7e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Jun 2009 11:41:18 -0700 Subject: [PATCH 230/741] alpha: fix percpu build breakage alpha percpu access requires custom SHIFT_PERCPU_PTR() definition for modules to work around addressing range limitation. This is done via generating inline assembly using C preprocessing which forces the assembler to generate external reference. This happens behind the compiler's back and makes the compiler think that static percpu variables in modules are unused. This used to be worked around by using __unused attribute for percpu variables which prevent the compiler from omitting the variable; however, recent declare/definition attribute unification change broke this as __used can't be used for declaration. Also, in the process, PER_CPU_ATTRIBUTES definition in alpha percpu.h got broken. This patch adds PER_CPU_DEF_ATTRIBUTES which is only used for definitions and make alpha use it to add __used for percpu variables in modules. This also fixes the PER_CPU_ATTRIBUTES double definition bug. Signed-off-by: Tejun Heo Tested-by: maximilian attems Acked-by: Ivan Kokshaysky Cc: Richard Henderson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/percpu.h | 6 +++--- include/asm-generic/percpu.h | 4 ++++ include/linux/percpu-defs.h | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index 06c5c7a4afd3..b663f1f10b6a 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -30,7 +30,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #ifndef MODULE #define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset)) -#define PER_CPU_ATTRIBUTES +#define PER_CPU_DEF_ATTRIBUTES #else /* * To calculate addresses of locally defined variables, GCC uses 32-bit @@ -49,7 +49,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; : "=&r"(__ptr), "=&r"(tmp_gp)); \ (typeof(&per_cpu_var(var)))(__ptr + (offset)); }) -#define PER_CPU_ATTRIBUTES __used +#define PER_CPU_DEF_ATTRIBUTES __used #endif /* MODULE */ @@ -71,7 +71,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __get_cpu_var(var) per_cpu_var(var) #define __raw_get_cpu_var(var) per_cpu_var(var) -#define PER_CPU_ATTRIBUTES +#define PER_CPU_DEF_ATTRIBUTES #endif /* SMP */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index d7d50d7ee51e..aa00800adacc 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -97,4 +97,8 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_ATTRIBUTES #endif +#ifndef PER_CPU_DEF_ATTRIBUTES +#define PER_CPU_DEF_ATTRIBUTES +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f921d74f49f..68438e18fff4 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -24,7 +24,8 @@ #define DEFINE_PER_CPU_SECTION(type, name, section) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + PER_CPU_ATTRIBUTES PER_CPU_DEF_ATTRIBUTES \ + __typeof__(type) per_cpu__##name /* * Variant on the per-CPU variable declaration/definition theme used for From 972c71a3183ab41c0b1a9e50842be7e3e980954f Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 30 Jun 2009 11:41:20 -0700 Subject: [PATCH 231/741] gcov: fix documentation Commonly available versions of cp and tar don't work well with special files created using seq_file. Mention this problem in the gcov documentation and update the helper script example to work around these problems. Signed-off-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gcov.txt | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt index e716aadb3a33..40ec63352760 100644 --- a/Documentation/gcov.txt +++ b/Documentation/gcov.txt @@ -188,13 +188,18 @@ Solution: Exclude affected source files from profiling by specifying GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the corresponding Makefile. +Problem: Files copied from sysfs appear empty or incomplete. +Cause: Due to the way seq_file works, some tools such as cp or tar + may not correctly copy files from sysfs. +Solution: Use 'cat' to read .gcda files and 'cp -d' to copy links. + Alternatively use the mechanism shown in Appendix B. + Appendix A: gather_on_build.sh ============================== Sample script to gather coverage meta files on the build machine (see 6a): - #!/bin/bash KSRC=$1 @@ -226,7 +231,7 @@ Appendix B: gather_on_test.sh Sample script to gather coverage data files on the test machine (see 6b): -#!/bin/bash +#!/bin/bash -e DEST=$1 GCDA=/sys/kernel/debug/gcov @@ -236,11 +241,13 @@ if [ -z "$DEST" ] ; then exit 1 fi -find $GCDA -name '*.gcno' -o -name '*.gcda' | tar cfz $DEST -T - +TEMPDIR=$(mktemp -d) +echo Collecting data.. +find $GCDA -type d -exec mkdir -p $TEMPDIR/\{\} \; +find $GCDA -name '*.gcda' -exec sh -c 'cat < $0 > '$TEMPDIR'/$0' {} \; +find $GCDA -name '*.gcno' -exec sh -c 'cp -d $0 '$TEMPDIR'/$0' {} \; +tar czf $DEST -C $TEMPDIR sys +rm -rf $TEMPDIR -if [ $? -eq 0 ] ; then - echo "$DEST successfully created, copy to build system and unpack with:" - echo " tar xfz $DEST" -else - echo "Could not create file $DEST" -fi +echo "$DEST successfully created, copy to build system and unpack with:" +echo " tar xfz $DEST" From c4285b47b0514e2103584ee829246f813e7ae323 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Tue, 30 Jun 2009 11:41:21 -0700 Subject: [PATCH 232/741] parport/serial: add support for NetMos 9901 Multi-IO card Add support for the PCI-Express NetMos 9901 Multi-IO card. 0001:06:00.0 Serial controller [0700]: NetMos Technology Device [9710:9901] (prog-if 02 [16550]) Subsystem: Device [a000:1000] Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Kernel driver in use: serial Kernel modules: 8250_pci 0001:06:00.1 Serial controller [0700]: NetMos Technology Device [9710:9901] (prog-if 02 [16550]) Subsystem: Device [a000:1000] Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Kernel driver in use: serial Kernel modules: 8250_pci 0001:06:00.2 Parallel controller [0701]: NetMos Technology Device [9710:9901] (prog-if 03 [IEEE1284]) Subsystem: Device [a000:2000] Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- Region 2: Memory at 80101000 (32-bit, non-prefetchable) [size=4K] Region 4: Memory at 80100000 (32-bit, non-prefetchable) [size=4K] Capabilities: Kernel driver in use: parport_pc Kernel modules: parport_pc [ 16.760181] PCI parallel port detected: 416c:0100, I/O at 0x812010(0x0), IRQ 65 [ 16.760225] parport0: PC-style at 0x812010, irq 65 [PCSPP,TRISTATE,EPP] [ 16.851842] serial 0001:06:00.0: enabling device (0004 -> 0007) [ 16.883776] 0001:06:00.0: ttyS0 at I/O 0x812030 (irq = 65) is a ST16650V2 [ 16.893832] serial 0001:06:00.1: enabling device (0004 -> 0007) [ 16.926537] 0001:06:00.1: ttyS1 at I/O 0x812020 (irq = 65) is a ST16650V2 Signed-off-by: Michael Buesch Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/parport/parport_pc.c | 5 ++++- drivers/serial/8250_pci.c | 6 ++++++ include/linux/pci_ids.h | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 1032d5fdbd42..2597145a066e 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2907,6 +2907,7 @@ enum parport_pc_pci_cards { netmos_9755, netmos_9805, netmos_9815, + netmos_9901, quatech_sppxp100, }; @@ -2987,7 +2988,7 @@ static struct parport_pc_pci { /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* netmos_9805 */ { 1, { { 0, -1 }, } }, /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, - + /* netmos_9901 */ { 1, { { 0, -1 }, } }, /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, }; @@ -3089,6 +3090,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9805 }, { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9815, PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901, + 0xA000, 0x2000, 0, 0, netmos_9901 }, /* Quatech SPPXP-100 Parallel port PCI ExpressCard */ { PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SPPXP_100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index a07015d646dd..6160e03f410c 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -759,6 +759,8 @@ static int pci_netmos_init(struct pci_dev *dev) /* subdevice 0x00PS means

parallel, serial */ unsigned int num_serial = dev->subsystem_device & 0xf; + if (dev->device == PCI_DEVICE_ID_NETMOS_9901) + return 0; if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM && dev->subsystem_device == 0x0299) return 0; @@ -3557,6 +3559,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_VENDOR_ID_IBM, 0x0299, 0, 0, pbn_b0_bt_2_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901, + 0xA000, 0x1000, + 0, 0, pbn_b0_1_115200 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a3b000365795..73b46b6b904f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2645,6 +2645,7 @@ #define PCI_DEVICE_ID_NETMOS_9835 0x9835 #define PCI_DEVICE_ID_NETMOS_9845 0x9845 #define PCI_DEVICE_ID_NETMOS_9855 0x9855 +#define PCI_DEVICE_ID_NETMOS_9901 0x9901 #define PCI_VENDOR_ID_3COM_2 0xa727 From b1cfebc9231a69d46d66982a2c856ba41ef6d6b9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 30 Jun 2009 11:41:22 -0700 Subject: [PATCH 233/741] edac: add DDR3 memory type for MPC85xx EDAC Since some new MPC85xx SOCs support DDR3 memory now, so add DDR3 memory type for MPC85xx EDAC. Signed-off-by: Yang Shi Cc: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/edac_core.h | 4 ++++ drivers/edac/edac_mc_sysfs.c | 4 +++- drivers/edac/mpc85xx_edac.c | 6 ++++++ drivers/edac/mpc85xx_edac.h | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 3493c6bdb820..871c13b4c148 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -150,6 +150,8 @@ enum mem_type { MEM_FB_DDR2, /* fully buffered DDR2 */ MEM_RDDR2, /* Registered DDR2 RAM */ MEM_XDR, /* Rambus XDR */ + MEM_DDR3, /* DDR3 RAM */ + MEM_RDDR3, /* Registered DDR3 RAM */ }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -167,6 +169,8 @@ enum mem_type { #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) #define MEM_FLAG_XDR BIT(MEM_XDR) +#define MEM_FLAG_DDR3 BIT(MEM_DDR3) +#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) /* chipset Error Detection and Correction capabilities and mode */ enum edac_type { diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ad218fe4942d..e1d4ce083481 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -94,7 +94,9 @@ static const char *mem_types[] = { [MEM_DDR2] = "Unbuffered-DDR2", [MEM_FB_DDR2] = "FullyBuffered-DDR2", [MEM_RDDR2] = "Registered-DDR2", - [MEM_XDR] = "XDR" + [MEM_XDR] = "XDR", + [MEM_DDR3] = "Unbuffered-DDR3", + [MEM_RDDR3] = "Registered-DDR3" }; static const char *dev_types[] = { diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 7c8c2d72916f..3f2ccfc6407c 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -757,6 +757,9 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) case DSC_SDTYPE_DDR2: mtype = MEM_RDDR2; break; + case DSC_SDTYPE_DDR3: + mtype = MEM_RDDR3; + break; default: mtype = MEM_UNKNOWN; break; @@ -769,6 +772,9 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) case DSC_SDTYPE_DDR2: mtype = MEM_DDR2; break; + case DSC_SDTYPE_DDR3: + mtype = MEM_DDR3; + break; default: mtype = MEM_UNKNOWN; break; diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 135b3539a030..52432ee7c4b9 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -53,6 +53,7 @@ #define DSC_SDTYPE_DDR 0x02000000 #define DSC_SDTYPE_DDR2 0x03000000 +#define DSC_SDTYPE_DDR3 0x07000000 #define DSC_X32_EN 0x00000020 /* Err_Int_En */ From 341c87bf346f57748230628c5ad6ee69219250e8 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 30 Jun 2009 11:41:23 -0700 Subject: [PATCH 234/741] elf: limit max map count to safe value With ELF, at generating coredump, some more headers other than used vmas are added. When max_map_count == 65536, a core generated by following kinds of code can be unreadable because the number of ELF's program header is written in 16bit in Ehdr (please see elf.h) and the number overflows. == ... = mmap(); (munmap, mprotect, etc...) if (failed) abort(); == This can happen in mmap/munmap/mprotect/etc...which calls split_vma(). I think 65536 is not safe as _default_ and reduce it to 65530 is good for avoiding unexpected corrupted core. Anyway, max_map_count can be enlarged by sysctl if a user is brave.. Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Jakub Jelinek Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 5 ++++- include/linux/sched.h | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9fa212b014a5..f1867900e459 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) goto out; - + /* + * The number of segs are recored into ELF header as 16bit value. + * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. + */ segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d0754269884..0085d758d645 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,8 +349,20 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); struct nsproxy; struct user_namespace; -/* Maximum number of active map areas.. This is a random (large) number */ -#define DEFAULT_MAX_MAP_COUNT 65536 +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN) extern int sysctl_max_map_count; From 4d6c13f87db12ae1ce35ea6a15688ac72419b133 Mon Sep 17 00:00:00 2001 From: Bryan Donlan Date: Tue, 30 Jun 2009 11:41:24 -0700 Subject: [PATCH 235/741] ext2: return -EIO not -ESTALE on directory traversal through deleted inode ext2_iget() returns -ESTALE if invoked on a deleted inode, in order to report errors to NFS properly. However, in ext[234]_lookup(), this -ESTALE can be propagated to userspace if the filesystem is corrupted such that a directory entry references a deleted inode. This leads to a misleading error message - "Stale NFS file handle" - and confusion on the part of the admin. The bug can be easily reproduced by creating a new filesystem, making a link to an unused inode using debugfs, then mounting and attempting to ls -l said link. This patch thus changes ext2_lookup to return -EIO if it receives -ESTALE from ext2_iget(), as ext2 does for other filesystem metadata corruption; and also invokes the appropriate ext*_error functions when this case is detected. Signed-off-by: Bryan Donlan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/namei.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 6524ecaebb7a..e1dedb0f7873 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str inode = NULL; if (ino) { inode = ext2_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext2_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } From c49568235dd7b4a2ffad63aa950562f4ffb9455f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jun 2009 11:41:25 -0700 Subject: [PATCH 236/741] dmapools: protect page_list walk in show_pools() show_pools() walks the page_list of a pool w/o protection against the list modifications in alloc/free. Take pool->lock to avoid stomping into nirvana. Signed-off-by: Thomas Gleixner Signed-off-by: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/dmapool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/dmapool.c b/mm/dmapool.c index b1f0885dda22..3df063706f53 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -86,10 +86,12 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf) unsigned pages = 0; unsigned blocks = 0; + spin_lock_irq(&pool->lock); list_for_each_entry(page, &pool->page_list, page_list) { pages++; blocks += page->in_use; } + spin_unlock_irq(&pool->lock); /* per-pool info, no real statistics yet */ temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n", From b55f627feeb9d48fdbde3835e18afbc76712e49b Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 30 Jun 2009 11:41:26 -0700 Subject: [PATCH 237/741] spi: new spi->mode bits Add two new spi_device.mode bits to accomodate more protocol options, and pass them through to usermode drivers: * SPI_NO_CS ... a second 3-wire variant, where the chipselect line is removed instead of a data line; transfers are still full duplex. This obviously has STRONG protocol implications since the chipselect transitions can't be used to synchronize state transitions with the SPI master. * SPI_READY ... defines open drain signal that's pulled low to pause the clock. This defines a 5-wire variant (normal 4-wire SPI plus READY) and two 4-wire variants (READY plus each of the 3-wire flavors). Such hardware flow control can be a big win. There are ADC converters and flash chips that expose READY signals, but not many host controllers support it today. The spi_bitbang code should be changed to use SPI_NO_CS instead of its current nonportable hack. That's a mode most hardware can easily support (unlike SPI_READY). Signed-off-by: David Brownell Cc: "Paulraj, Sandeep" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/spi/spidev_test.c | 10 +++++++++- drivers/spi/spidev.c | 17 +++++++++++------ include/linux/spi/spi.h | 2 ++ include/linux/spi/spidev.h | 2 ++ 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c index cf0e3ce0d526..c1a5aad3c75a 100644 --- a/Documentation/spi/spidev_test.c +++ b/Documentation/spi/spidev_test.c @@ -99,11 +99,13 @@ void parse_opts(int argc, char *argv[]) { "lsb", 0, 0, 'L' }, { "cs-high", 0, 0, 'C' }, { "3wire", 0, 0, '3' }, + { "no-cs", 0, 0, 'N' }, + { "ready", 0, 0, 'R' }, { NULL, 0, 0, 0 }, }; int c; - c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL); + c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR", lopts, NULL); if (c == -1) break; @@ -139,6 +141,12 @@ void parse_opts(int argc, char *argv[]) case '3': mode |= SPI_3WIRE; break; + case 'N': + mode |= SPI_NO_CS; + break; + case 'R': + mode |= SPI_READY; + break; default: print_usage(argv[0]); break; diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 5d869c4d3eb2..606e7a40a8da 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -58,15 +58,20 @@ static unsigned long minors[N_SPI_MINORS / BITS_PER_LONG]; /* Bit masks for spi_device.mode management. Note that incorrect - * settings for CS_HIGH and 3WIRE can cause *lots* of trouble for other - * devices on a shared bus: CS_HIGH, because this device will be - * active when it shouldn't be; 3WIRE, because when active it won't - * behave as it should. + * settings for some settings can cause *lots* of trouble for other + * devices on a shared bus: * - * REVISIT should changing those two modes be privileged? + * - CS_HIGH ... this device will be active when it shouldn't be + * - 3WIRE ... when active, it won't behave as it should + * - NO_CS ... there will be no explicit message boundaries; this + * is completely incompatible with the shared bus model + * - READY ... transfers may proceed when they shouldn't. + * + * REVISIT should changing those flags be privileged? */ #define SPI_MODE_MASK (SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \ - | SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP) + | SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \ + | SPI_NO_CS | SPI_READY) struct spidev_data { dev_t devt; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 9c4cd27f4685..743c933ac4e7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -80,6 +80,8 @@ struct spi_device { #define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */ #define SPI_3WIRE 0x10 /* SI/SO signals shared */ #define SPI_LOOP 0x20 /* loopback mode */ +#define SPI_NO_CS 0x40 /* 1 dev/bus, no chipselect */ +#define SPI_READY 0x80 /* slave pulls low to pause */ u8 bits_per_word; int irq; void *controller_state; diff --git a/include/linux/spi/spidev.h b/include/linux/spi/spidev.h index 95251ccd5a07..bf0570a84f7a 100644 --- a/include/linux/spi/spidev.h +++ b/include/linux/spi/spidev.h @@ -40,6 +40,8 @@ #define SPI_LSB_FIRST 0x08 #define SPI_3WIRE 0x10 #define SPI_LOOP 0x20 +#define SPI_NO_CS 0x40 +#define SPI_READY 0x80 /*---------------------------------------------------------------------------*/ From 70d6027ff2bc8bab180273b77e7ab3e8a62cca51 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 30 Jun 2009 11:41:27 -0700 Subject: [PATCH 238/741] spi: add spi_master flag word Add a new spi_master.flags word listing constraints relevant to that controller. Define the first constraint bit: a half duplex restriction. Include that constraint in the OMAP1 MicroWire controller driver. Have the mmc_spi host be the first customer of this flag. Its coding relies heavily on full duplex transfers, so it must fail when the underlying controller driver won't perform them. (The spi_write_then_read routine could use it too: use the temporarily-withdrawn full-duplex speedup unless this flag is set, in which case the existing code applies. Similarly, any spi_master implementing only SPI_3WIRE should set the flag.) Signed-off-by: David Brownell Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/mmc_spi.c | 6 ++++++ drivers/spi/omap_uwire.c | 2 ++ include/linux/spi/spi.h | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 240608cc7ae9..a461017ce5ce 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1313,6 +1313,12 @@ static int mmc_spi_probe(struct spi_device *spi) struct mmc_spi_host *host; int status; + /* We rely on full duplex transfers, mostly to reduce + * per-transfer overheads (by making fewer transfers). + */ + if (spi->master->flags & SPI_MASTER_HALF_DUPLEX) + return -EINVAL; + /* MMC and SD specs only seem to care that sampling is on the * rising edge ... meaning SPI modes 0 or 3. So either SPI mode * should be legit. We'll use mode 0 since the steady state is 0, diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c index aa90ddb37066..8980a5640bd9 100644 --- a/drivers/spi/omap_uwire.c +++ b/drivers/spi/omap_uwire.c @@ -514,6 +514,8 @@ static int __init uwire_probe(struct platform_device *pdev) /* the spi->mode bits understood by this driver: */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->flags = SPI_MASTER_HALF_DUPLEX; + master->bus_num = 2; /* "official" */ master->num_chipselect = 4; master->setup = uwire_setup; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 743c933ac4e7..c47c4b4da97e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -250,6 +250,10 @@ struct spi_master { /* spi_device.mode flags understood by this controller driver */ u16 mode_bits; + /* other constraints relevant to this driver */ + u16 flags; +#define SPI_MASTER_HALF_DUPLEX BIT(0) /* can't do full duplex */ + /* Setup mode and clock, etc (spi driver may call many times). * * IMPORTANT: this may be called when transfers to another From 537a1bf059fa312355696fa6db80726e655e7f17 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Tue, 30 Jun 2009 11:41:29 -0700 Subject: [PATCH 239/741] fbdev: add mutex for fb_mmap locking Add a mutex to avoid a circular locking problem between the mm layer semaphore and fbdev ioctl mutex through the fb_mmap() call. Also, add mutex to all places where smem_start and smem_len fields change so the mutex inside the fb_mmap() is actually used. Changing of these fields before calling the framebuffer_register() are not mutexed. This is 2.6.31 material. It removes one lockdep (fb_mmap() and register_framebuffer()) but there is still another one (fb_release() and register_framebuffer()). It also cleans up handling of the smem_start and smem_len fields used by mutexed section of the fb_mmap(). Signed-off-by: Krzysztof Helt Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/atafb.c | 7 ++++++- drivers/video/atmel_lcdfb.c | 2 ++ drivers/video/fbmem.c | 13 +++++-------- drivers/video/fsl-diu-fb.c | 14 +++++++++----- drivers/video/i810/i810_main.c | 2 ++ drivers/video/matrox/matroxfb_base.c | 3 +++ drivers/video/matrox/matroxfb_crtc2.c | 5 ++++- drivers/video/mx3fb.c | 17 +++++++++++------ drivers/video/omap/omapfb_main.c | 4 ++++ drivers/video/platinumfb.c | 2 ++ drivers/video/pxafb.c | 2 ++ drivers/video/sh7760fb.c | 19 ++++++------------- drivers/video/sis/sis_main.c | 2 ++ drivers/video/sm501fb.c | 21 +++++++++++++-------- drivers/video/w100fb.c | 2 ++ include/linux/fb.h | 1 + 16 files changed, 74 insertions(+), 42 deletions(-) diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c index 018850c116c6..497ff8af03ed 100644 --- a/drivers/video/atafb.c +++ b/drivers/video/atafb.c @@ -2414,7 +2414,10 @@ static int atafb_get_fix(struct fb_fix_screeninfo *fix, struct fb_info *info) if (err) return err; memset(fix, 0, sizeof(struct fb_fix_screeninfo)); - return fbhw->encode_fix(fix, &par); + mutex_lock(&info->mm_lock); + err = fbhw->encode_fix(fix, &par); + mutex_unlock(&info->mm_lock); + return err; } static int atafb_get_var(struct fb_var_screeninfo *var, struct fb_info *info) @@ -2743,7 +2746,9 @@ static int atafb_set_par(struct fb_info *info) /* Decode wanted screen parameters */ fbhw->decode_var(&info->var, par); + mutex_lock(&info->mm_lock); fbhw->encode_fix(&info->fix, par); + mutex_unlock(&info->mm_lock); /* Set new videomode */ ata_set_par(par); diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c index 5afd64482f55..cb88394ba995 100644 --- a/drivers/video/atmel_lcdfb.c +++ b/drivers/video/atmel_lcdfb.c @@ -270,7 +270,9 @@ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo) smem_len = (var->xres_virtual * var->yres_virtual * ((var->bits_per_pixel + 7) / 8)); + mutex_lock(&info->mm_lock); info->fix.smem_len = max(smem_len, sinfo->smem_len); + mutex_unlock(&info->mm_lock); info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len, (dma_addr_t *)&info->fix.smem_start, GFP_KERNEL); diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index f8a09bf8d0cd..53ea05645ff8 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1310,8 +1310,6 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd, static int fb_mmap(struct file *file, struct vm_area_struct * vma) -__acquires(&info->lock) -__releases(&info->lock) { int fbidx = iminor(file->f_path.dentry->d_inode); struct fb_info *info = registered_fb[fbidx]; @@ -1325,16 +1323,14 @@ __releases(&info->lock) off = vma->vm_pgoff << PAGE_SHIFT; if (!fb) return -ENODEV; + mutex_lock(&info->mm_lock); if (fb->fb_mmap) { int res; - mutex_lock(&info->lock); res = fb->fb_mmap(info, vma); - mutex_unlock(&info->lock); + mutex_unlock(&info->mm_lock); return res; } - mutex_lock(&info->lock); - /* frame buffer memory */ start = info->fix.smem_start; len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len); @@ -1342,13 +1338,13 @@ __releases(&info->lock) /* memory mapped io */ off -= len; if (info->var.accel_flags) { - mutex_unlock(&info->lock); + mutex_unlock(&info->mm_lock); return -EINVAL; } start = info->fix.mmio_start; len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len); } - mutex_unlock(&info->lock); + mutex_unlock(&info->mm_lock); start &= PAGE_MASK; if ((vma->vm_end - vma->vm_start + off) > len) return -EINVAL; @@ -1518,6 +1514,7 @@ register_framebuffer(struct fb_info *fb_info) break; fb_info->node = i; mutex_init(&fb_info->lock); + mutex_init(&fb_info->mm_lock); fb_info->dev = device_create(fb_class, fb_info->device, MKDEV(FB_MAJOR, i), NULL, "fb%d", i); diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index f153c581cbd7..0bf2190928d0 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -750,24 +750,26 @@ static void update_lcdc(struct fb_info *info) static int map_video_memory(struct fb_info *info) { phys_addr_t phys; + u32 smem_len = info->fix.line_length * info->var.yres_virtual; pr_debug("info->var.xres_virtual = %d\n", info->var.xres_virtual); pr_debug("info->var.yres_virtual = %d\n", info->var.yres_virtual); pr_debug("info->fix.line_length = %d\n", info->fix.line_length); + pr_debug("MAP_VIDEO_MEMORY: smem_len = %u\n", smem_len); - info->fix.smem_len = info->fix.line_length * info->var.yres_virtual; - pr_debug("MAP_VIDEO_MEMORY: smem_len = %d\n", info->fix.smem_len); - info->screen_base = fsl_diu_alloc(info->fix.smem_len, &phys); + info->screen_base = fsl_diu_alloc(smem_len, &phys); if (info->screen_base == NULL) { printk(KERN_ERR "Unable to allocate fb memory\n"); return -ENOMEM; } + mutex_lock(&info->mm_lock); info->fix.smem_start = (unsigned long) phys; + info->fix.smem_len = smem_len; + mutex_unlock(&info->mm_lock); info->screen_size = info->fix.smem_len; pr_debug("Allocated fb @ paddr=0x%08lx, size=%d.\n", - info->fix.smem_start, - info->fix.smem_len); + info->fix.smem_start, info->fix.smem_len); pr_debug("screen base %p\n", info->screen_base); return 0; @@ -776,9 +778,11 @@ static int map_video_memory(struct fb_info *info) static void unmap_video_memory(struct fb_info *info) { fsl_diu_free(info->screen_base, info->fix.smem_len); + mutex_lock(&info->mm_lock); info->screen_base = NULL; info->fix.smem_start = 0; info->fix.smem_len = 0; + mutex_unlock(&info->mm_lock); } /* diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c index 2e940199fc89..71960672d721 100644 --- a/drivers/video/i810/i810_main.c +++ b/drivers/video/i810/i810_main.c @@ -1090,8 +1090,10 @@ static int encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info) memset(fix, 0, sizeof(struct fb_fix_screeninfo)); strcpy(fix->id, "I810"); + mutex_lock(&info->mm_lock); fix->smem_start = par->fb.physical; fix->smem_len = par->fb.size; + mutex_unlock(&info->mm_lock); fix->type = FB_TYPE_PACKED_PIXELS; fix->type_aux = 0; fix->xpanstep = 8; diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c index 8e7a275df50c..59c3a2e14913 100644 --- a/drivers/video/matrox/matroxfb_base.c +++ b/drivers/video/matrox/matroxfb_base.c @@ -724,8 +724,10 @@ static void matroxfb_update_fix(WPMINFO2) struct fb_fix_screeninfo *fix = &ACCESS_FBINFO(fbcon).fix; DBG(__func__) + mutex_lock(&ACCESS_FBINFO(fbcon).mm_lock); fix->smem_start = ACCESS_FBINFO(video.base) + ACCESS_FBINFO(curr.ydstorg.bytes); fix->smem_len = ACCESS_FBINFO(video.len_usable) - ACCESS_FBINFO(curr.ydstorg.bytes); + mutex_unlock(&ACCESS_FBINFO(fbcon).mm_lock); } static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) @@ -2081,6 +2083,7 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm spin_lock_init(&ACCESS_FBINFO(lock.accel)); init_rwsem(&ACCESS_FBINFO(crtc2.lock)); init_rwsem(&ACCESS_FBINFO(altout.lock)); + mutex_init(&ACCESS_FBINFO(fbcon).mm_lock); ACCESS_FBINFO(irq_flags) = 0; init_waitqueue_head(&ACCESS_FBINFO(crtc1.vsync.wait)); init_waitqueue_head(&ACCESS_FBINFO(crtc2.vsync.wait)); diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c index 7ac4c5f6145d..909e10a11898 100644 --- a/drivers/video/matrox/matroxfb_crtc2.c +++ b/drivers/video/matrox/matroxfb_crtc2.c @@ -289,13 +289,16 @@ static int matroxfb_dh_release(struct fb_info* info, int user) { #undef m2info } -static void matroxfb_dh_init_fix(struct matroxfb_dh_fb_info *m2info) { +static void matroxfb_dh_init_fix(struct matroxfb_dh_fb_info *m2info) +{ struct fb_fix_screeninfo *fix = &m2info->fbcon.fix; strcpy(fix->id, "MATROX DH"); + mutex_lock(&m2info->fbcon.mm_lock); fix->smem_start = m2info->video.base; fix->smem_len = m2info->video.len_usable; + mutex_unlock(&m2info->fbcon.mm_lock); fix->ypanstep = 1; fix->ywrapstep = 0; fix->xpanstep = 8; /* TBD */ diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c index b7af5256e887..567fb944bd2a 100644 --- a/drivers/video/mx3fb.c +++ b/drivers/video/mx3fb.c @@ -669,7 +669,7 @@ static uint32_t bpp_to_pixfmt(int bpp) } static int mx3fb_blank(int blank, struct fb_info *fbi); -static int mx3fb_map_video_memory(struct fb_info *fbi); +static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len); static int mx3fb_unmap_video_memory(struct fb_info *fbi); /** @@ -742,8 +742,7 @@ static int mx3fb_set_par(struct fb_info *fbi) if (fbi->fix.smem_start) mx3fb_unmap_video_memory(fbi); - fbi->fix.smem_len = mem_len; - if (mx3fb_map_video_memory(fbi) < 0) { + if (mx3fb_map_video_memory(fbi, mem_len) < 0) { mutex_unlock(&mx3_fbi->mutex); return -ENOMEM; } @@ -1198,6 +1197,7 @@ static int mx3fb_resume(struct platform_device *pdev) /** * mx3fb_map_video_memory() - allocates the DRAM memory for the frame buffer. * @fbi: framebuffer information pointer + * @mem_len: length of mapped memory * @return: Error code indicating success or failure * * This buffer is remapped into a non-cached, non-buffered, memory region to @@ -1205,23 +1205,26 @@ static int mx3fb_resume(struct platform_device *pdev) * area is remapped, all virtual memory access to the video memory should occur * at the new region. */ -static int mx3fb_map_video_memory(struct fb_info *fbi) +static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len) { int retval = 0; dma_addr_t addr; fbi->screen_base = dma_alloc_writecombine(fbi->device, - fbi->fix.smem_len, + mem_len, &addr, GFP_DMA); if (!fbi->screen_base) { dev_err(fbi->device, "Cannot allocate %u bytes framebuffer memory\n", - fbi->fix.smem_len); + mem_len); retval = -EBUSY; goto err0; } + mutex_lock(&fbi->mm_lock); fbi->fix.smem_start = addr; + fbi->fix.smem_len = mem_len; + mutex_unlock(&fbi->mm_lock); dev_dbg(fbi->device, "allocated fb @ p=0x%08x, v=0x%p, size=%d.\n", (uint32_t) fbi->fix.smem_start, fbi->screen_base, fbi->fix.smem_len); @@ -1251,8 +1254,10 @@ static int mx3fb_unmap_video_memory(struct fb_info *fbi) fbi->screen_base, fbi->fix.smem_start); fbi->screen_base = 0; + mutex_lock(&fbi->mm_lock); fbi->fix.smem_start = 0; fbi->fix.smem_len = 0; + mutex_unlock(&fbi->mm_lock); return 0; } diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c index 060d72fe57cb..4ea99bfc37b4 100644 --- a/drivers/video/omap/omapfb_main.c +++ b/drivers/video/omap/omapfb_main.c @@ -393,8 +393,10 @@ static void set_fb_fix(struct fb_info *fbi) rg = &plane->fbdev->mem_desc.region[plane->idx]; fbi->screen_base = rg->vaddr; + mutex_lock(&fbi->mm_lock); fix->smem_start = rg->paddr; fix->smem_len = rg->size; + mutex_unlock(&fbi->mm_lock); fix->type = FB_TYPE_PACKED_PIXELS; bpp = var->bits_per_pixel; @@ -886,8 +888,10 @@ static int omapfb_setup_mem(struct fb_info *fbi, struct omapfb_mem_info *mi) * plane memory is dealloce'd, the other * screen parameters in var / fix are invalid. */ + mutex_lock(&fbi->mm_lock); fbi->fix.smem_start = 0; fbi->fix.smem_len = 0; + mutex_unlock(&fbi->mm_lock); } } } diff --git a/drivers/video/platinumfb.c b/drivers/video/platinumfb.c index 03b3670130a0..bacfabd9ce16 100644 --- a/drivers/video/platinumfb.c +++ b/drivers/video/platinumfb.c @@ -141,7 +141,9 @@ static int platinumfb_set_par (struct fb_info *info) offset = 0x10; info->screen_base = pinfo->frame_buffer + init->fb_offset + offset; + mutex_lock(&info->mm_lock); info->fix.smem_start = (pinfo->frame_buffer_phys) + init->fb_offset + offset; + mutex_unlock(&info->mm_lock); info->fix.visual = (pinfo->cmode == CMODE_8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR; info->fix.line_length = vmode_attrs[pinfo->vmode-1].hres * (1<cmode) diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index 0889d50c3288..6506117c134b 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -815,8 +815,10 @@ static int overlayfb_map_video_memory(struct pxafb_layer *ofb) ofb->video_mem_phys = virt_to_phys(ofb->video_mem); ofb->video_mem_size = size; + mutex_lock(&ofb->fb.mm_lock); ofb->fb.fix.smem_start = ofb->video_mem_phys; ofb->fb.fix.smem_len = ofb->fb.fix.line_length * var->yres_virtual; + mutex_unlock(&ofb->fb.mm_lock); ofb->fb.screen_base = ofb->video_mem; return 0; } diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c index 653bdfee3057..9f6d6e61f0cc 100644 --- a/drivers/video/sh7760fb.c +++ b/drivers/video/sh7760fb.c @@ -120,18 +120,6 @@ static int sh7760_setcolreg (u_int regno, return 0; } -static void encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info, - unsigned long stride) -{ - memset(fix, 0, sizeof(struct fb_fix_screeninfo)); - strcpy(fix->id, "sh7760-lcdc"); - - fix->smem_start = (unsigned long)info->screen_base; - fix->smem_len = info->screen_size; - - fix->line_length = stride; -} - static int sh7760fb_get_color_info(struct device *dev, u16 lddfr, int *bpp, int *gray) { @@ -334,7 +322,8 @@ static int sh7760fb_set_par(struct fb_info *info) iowrite32(ldsarl, par->base + LDSARL); /* mem for lower half of DSTN */ - encode_fix(&info->fix, info, stride); + info->fix.line_length = stride; + sh7760fb_check_var(&info->var, info); sh7760fb_blank(FB_BLANK_UNBLANK, info); /* panel on! */ @@ -435,6 +424,8 @@ static int sh7760fb_alloc_mem(struct fb_info *info) info->screen_base = fbmem; info->screen_size = vram; + info->fix.smem_start = (unsigned long)info->screen_base; + info->fix.smem_len = info->screen_size; return 0; } @@ -520,6 +511,8 @@ static int __devinit sh7760fb_probe(struct platform_device *pdev) info->var.transp.length = 0; info->var.transp.msb_right = 0; + strcpy(info->fix.id, "sh7760-lcdc"); + /* set the DON2 bit now, before cmap allocation, as it will randomize * palette memory. */ diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c index 7072d19080d5..fd33455389b8 100644 --- a/drivers/video/sis/sis_main.c +++ b/drivers/video/sis/sis_main.c @@ -1847,8 +1847,10 @@ sisfb_get_fix(struct fb_fix_screeninfo *fix, int con, struct fb_info *info) strcpy(fix->id, ivideo->myid); + mutex_lock(&info->mm_lock); fix->smem_start = ivideo->video_base + ivideo->video_offset; fix->smem_len = ivideo->sisfb_mem; + mutex_unlock(&info->mm_lock); fix->type = FB_TYPE_PACKED_PIXELS; fix->type_aux = 0; fix->visual = (ivideo->video_bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index eb5d73a06702..98f24f0ec00d 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -145,7 +145,7 @@ static inline void sm501fb_sync_regs(struct sm501fb_info *info) #define SM501_MEMF_ACCEL (8) static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem, - unsigned int why, size_t size) + unsigned int why, size_t size, u32 smem_len) { struct sm501fb_par *par; struct fb_info *fbi; @@ -172,7 +172,7 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem, if (ptr > 0) ptr &= ~(PAGE_SIZE - 1); - if (fbi && ptr < fbi->fix.smem_len) + if (fbi && ptr < smem_len) return -ENOMEM; break; @@ -197,7 +197,7 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem, case SM501_MEMF_ACCEL: fbi = inf->fb[HEAD_CRT]; - ptr = fbi ? fbi->fix.smem_len : 0; + ptr = fbi ? smem_len : 0; fbi = inf->fb[HEAD_PANEL]; if (fbi) { @@ -413,6 +413,7 @@ static int sm501fb_set_par_common(struct fb_info *info, unsigned int mem_type; unsigned int clock_type; unsigned int head_addr; + unsigned int smem_len; dev_dbg(fbi->dev, "%s: %dx%d, bpp = %d, virtual %dx%d\n", __func__, var->xres, var->yres, var->bits_per_pixel, @@ -453,18 +454,20 @@ static int sm501fb_set_par_common(struct fb_info *info, /* allocate fb memory within 501 */ info->fix.line_length = (var->xres_virtual * var->bits_per_pixel)/8; - info->fix.smem_len = info->fix.line_length * var->yres_virtual; + smem_len = info->fix.line_length * var->yres_virtual; dev_dbg(fbi->dev, "%s: line length = %u\n", __func__, info->fix.line_length); - if (sm501_alloc_mem(fbi, &par->screen, mem_type, - info->fix.smem_len)) { + if (sm501_alloc_mem(fbi, &par->screen, mem_type, smem_len, smem_len)) { dev_err(fbi->dev, "no memory available\n"); return -ENOMEM; } + mutex_lock(&info->mm_lock); info->fix.smem_start = fbi->fbmem_res->start + par->screen.sm_addr; + info->fix.smem_len = smem_len; + mutex_unlock(&info->mm_lock); info->screen_base = fbi->fbmem + par->screen.sm_addr; info->screen_size = info->fix.smem_len; @@ -637,7 +640,8 @@ static int sm501fb_set_par_crt(struct fb_info *info) if ((control & SM501_DC_CRT_CONTROL_SEL) == 0) { /* the head is displaying panel data... */ - sm501_alloc_mem(fbi, &par->screen, SM501_MEMF_CRT, 0); + sm501_alloc_mem(fbi, &par->screen, SM501_MEMF_CRT, 0, + info->fix.smem_len); goto out_update; } @@ -1289,7 +1293,8 @@ static int sm501_init_cursor(struct fb_info *fbi, unsigned int reg_base) par->cursor_regs = info->regs + reg_base; - ret = sm501_alloc_mem(info, &par->cursor, SM501_MEMF_CURSOR, 1024); + ret = sm501_alloc_mem(info, &par->cursor, SM501_MEMF_CURSOR, 1024, + fbi->fix.smem_len); if (ret < 0) return ret; diff --git a/drivers/video/w100fb.c b/drivers/video/w100fb.c index d0674f1e3f10..8a141c2c637b 100644 --- a/drivers/video/w100fb.c +++ b/drivers/video/w100fb.c @@ -523,6 +523,7 @@ static int w100fb_set_par(struct fb_info *info) info->fix.ywrapstep = 0; info->fix.line_length = par->xres * BITS_PER_PIXEL / 8; + mutex_lock(&info->mm_lock); if ((par->xres*par->yres*BITS_PER_PIXEL/8) > (MEM_INT_SIZE+1)) { par->extmem_active = 1; info->fix.smem_len = par->mach->mem->size+1; @@ -530,6 +531,7 @@ static int w100fb_set_par(struct fb_info *info) par->extmem_active = 0; info->fix.smem_len = MEM_INT_SIZE+1; } + mutex_unlock(&info->mm_lock); w100fb_activate_var(par); } diff --git a/include/linux/fb.h b/include/linux/fb.h index dd68358996b7..f847df9e99b6 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -819,6 +819,7 @@ struct fb_info { int node; int flags; struct mutex lock; /* Lock for open/release/ioctl funcs */ + struct mutex mm_lock; /* Lock for fb_mmap and smem_* fields */ struct fb_var_screeninfo var; /* Current var */ struct fb_fix_screeninfo fix; /* Current fix */ struct fb_monspecs monspecs; /* Current Monitor specs */ From 529ba0d9669386157457a1cb96294d2fe79b3f88 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 30 Jun 2009 11:41:30 -0700 Subject: [PATCH 240/741] spi: bitbang bugfix in message setup Bugfix to spi_bitbang infrastructure: make sure to always set transfer parameters on the first pass through the message's per-transfer loop. This can matter with drivers that replace the per-word or per-buffer transfer primitives, on busses with multiple SPI devices. Previously, this could have started messages using the settings left after previous messages. The problem was observed when a high speed chip (m25p80 type flash) was running very slowly because a low speed device (avr8 microcontroller) had previously used the bus. Similar faults could have driven the low speed device too fast, or used an unexpected word size. Acked-by: Steven A. Falco Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi_bitbang.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c index 2a5abc08e857..f1db395dd889 100644 --- a/drivers/spi/spi_bitbang.c +++ b/drivers/spi/spi_bitbang.c @@ -258,6 +258,11 @@ static void bitbang_work(struct work_struct *work) struct spi_bitbang *bitbang = container_of(work, struct spi_bitbang, work); unsigned long flags; + int do_setup = -1; + int (*setup_transfer)(struct spi_device *, + struct spi_transfer *); + + setup_transfer = bitbang->setup_transfer; spin_lock_irqsave(&bitbang->lock, flags); bitbang->busy = 1; @@ -269,8 +274,6 @@ static void bitbang_work(struct work_struct *work) unsigned tmp; unsigned cs_change; int status; - int (*setup_transfer)(struct spi_device *, - struct spi_transfer *); m = container_of(bitbang->queue.next, struct spi_message, queue); @@ -287,19 +290,19 @@ static void bitbang_work(struct work_struct *work) tmp = 0; cs_change = 1; status = 0; - setup_transfer = NULL; list_for_each_entry (t, &m->transfers, transfer_list) { - /* override or restore speed and wordsize */ - if (t->speed_hz || t->bits_per_word) { - setup_transfer = bitbang->setup_transfer; + /* override speed or wordsize? */ + if (t->speed_hz || t->bits_per_word) + do_setup = 1; + + /* init (-1) or override (1) transfer params */ + if (do_setup != 0) { if (!setup_transfer) { status = -ENOPROTOOPT; break; } - } - if (setup_transfer) { status = setup_transfer(spi, t); if (status < 0) break; @@ -363,9 +366,10 @@ static void bitbang_work(struct work_struct *work) m->status = status; m->complete(m->context); - /* restore speed and wordsize */ - if (setup_transfer) + /* restore speed and wordsize if it was overridden */ + if (do_setup == 1) setup_transfer(spi, NULL); + do_setup = 0; /* normally deactivate chipselect ... unless no error and * cs_change has hinted that the next message will probably From 8bc1ad7dd301b7ca7454013519fa92e8c53655ff Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 30 Jun 2009 11:41:31 -0700 Subject: [PATCH 241/741] kernel/resource.c: fix sign extension in reserve_setup() When the 32-bit signed quantities get assigned to the u64 resource_size_t, they are incorrectly sign-extended. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13253 Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9905 Signed-off-by: Zhang Rui Reported-by: Leann Ogasawara Cc: Pierre Ossman Reported-by: Tested-by: Cc: Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/resource.c b/kernel/resource.c index ac5f3a36923f..78b087221c15 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -787,7 +787,7 @@ static int __init reserve_setup(char *str) static struct resource reserve[MAXRESERVE]; for (;;) { - int io_start, io_num; + unsigned int io_start, io_num; int x = reserved; if (get_option (&str, &io_start) != 2) From b4f90189dc2c7a7e6926ea480ae3404ec3de4581 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Jun 2009 11:41:32 -0700 Subject: [PATCH 242/741] MAINTAINERS: STARFIRE/DURALAN update Ion's cs.columbia.edu email address no longer works. Signed-off-by: Joe Perches Acked-by: Ion Badulescu Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 92fe0796cd1b..6a89e312a7b4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5576,8 +5576,8 @@ F: drivers/staging/ STARFIRE/DURALAN NETWORK DRIVER P: Ion Badulescu -M: ionut@cs.columbia.edu -S: Maintained +M: ionut@badula.org +S: Odd Fixes F: drivers/net/starfire* STARMODE RADIO IP (STRIP) PROTOCOL DRIVER From df279ca8966c3de83105428e3391ab17690802a9 Mon Sep 17 00:00:00 2001 From: Renaud Lottiaux Date: Tue, 30 Jun 2009 11:41:34 -0700 Subject: [PATCH 243/741] bsdacct: fix access to invalid filp in acct_on() The file opened in acct_on and freshly stored in the ns->bacct struct can be closed in acct_file_reopen by a concurrent call after we release acct_lock and before we call mntput(file->f_path.mnt). Record file->f_path.mnt in a local variable and use this variable only. Signed-off-by: Renaud Lottiaux Signed-off-by: Louis Rilling Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/acct.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index 7afa31564162..9f3391090b3e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -215,6 +215,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, static int acct_on(char *name) { struct file *file; + struct vfsmount *mnt; int error; struct pid_namespace *ns; struct bsd_acct_struct *acct = NULL; @@ -256,11 +257,12 @@ static int acct_on(char *name) acct = NULL; } - mnt_pin(file->f_path.mnt); + mnt = file->f_path.mnt; + mnt_pin(mnt); acct_file_reopen(ns->bacct, file, ns); spin_unlock(&acct_lock); - mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ + mntput(mnt); /* it's pinned, now give up active reference */ kfree(acct); return 0; From d7831a0bdf06b9f722b947bb0c205ff7d77cebd8 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 30 Jun 2009 11:41:35 -0700 Subject: [PATCH 244/741] mm: prevent balance_dirty_pages() from doing too much work balance_dirty_pages can overreact and move all of the dirty pages to writeback unnecessarily. balance_dirty_pages makes its decision to throttle based on the number of dirty plus writeback pages that are over the calculated limit,so it will continue to move pages even when there are plenty of pages in writeback and less than the threshold still dirty. This allows it to overshoot its limits and move all the dirty pages to writeback while waiting for the drives to catch up and empty the writeback list. A simple fio test easily demonstrates this problem. fio --name=f1 --directory=/disk1 --size=2G -rw=write --name=f2 --directory=/disk2 --size=1G --rw=write --startdelay=10 This is the simplest fix I could find, but I'm not entirely sure that it alone will be enough for all cases. But it certainly is an improvement on my desktop machine writing to 2 disks. Do we need something more for machines with large arrays where bdi_threshold * number_of_drives is greater than the dirty_ratio ? Signed-off-by: Richard Kennedy Acked-by: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7b0dcea4935b..7687879253b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -541,8 +541,11 @@ static void balance_dirty_pages(struct address_space *mapping) * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. + * Only move pages to writeback if this bdi is over its + * threshold otherwise wait until the disk writes catch + * up. */ - if (bdi_nr_reclaimable) { + if (bdi_nr_reclaimable > bdi_thresh) { writeback_inodes(&wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, From b37f2d4de6dfce4bfd6df311af80e4d61458ee1e Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 30 Jun 2009 11:41:36 -0700 Subject: [PATCH 245/741] cpusets: document adding/removing cpus to cpuset elaborately By writing a tasks's pid to the file, a process adds that task to that cgroup/cpuset. But to add a cpu/mem to a cpuset, the new list of cpus should be written to the cpuset.mems file which would replace the old list of cpus. Make this clearer in the documentation. Signed-off-by: Nikanth Karthikesan Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cpusets.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index f9ca389dddf4..1d7e9784439a 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt @@ -777,6 +777,18 @@ in cpuset directories: # /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 # /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 +To add a CPU to a cpuset, write the new list of CPUs including the +CPU to be added. To add 6 to the above cpuset: + +# /bin/echo 1-4,6 > cpus -> set cpus list to cpus 1,2,3,4,6 + +Similarly to remove a CPU from a cpuset, write the new list of CPUs +without the CPU to be removed. + +To remove all the CPUs: + +# /bin/echo "" > cpus -> clear cpus list + 2.3 Setting flags ----------------- From 66918dcdf91ad101194c749c18099e836ba3de2b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 30 Jun 2009 11:41:37 -0700 Subject: [PATCH 246/741] x86: only clear node_states for 64bit Nathan reported that | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74 | Author: Yinghai Lu | Date: Tue Jun 16 15:33:00 2009 -0700 | | page-allocator: clear N_HIGH_MEMORY map before we set it again | | SRAT tables may contains nodes of very small size. The arch code may | decide to not activate such a node. However, currently the early boot | code sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be | active although these nodes have no present pages. | | For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too unintentionally and incorrectly clears the cpuset.mems cgroup attribute on an i386 kvm guest, meaning that cpuset.mems can not be used. Fix this by only clearing node_states[N_NORMAL_MEMORY] for 64bit only. and need to do save/restore for that in find_zone_movable_pfn Reported-by: Nathan Lynch Tested-by: Nathan Lynch Signed-off-by: Yinghai Lu Cc: Christoph Lameter Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 2 ++ mm/page_alloc.c | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c4378f4fd4a5..b177652251a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -598,6 +598,8 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + /* clear the default setting with node 0 */ + nodes_clear(node_states[N_NORMAL_MEMORY]); free_area_init_nodes(max_zone_pfns); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5d714f8fb303..e0f2cdf9d8b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4032,6 +4032,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) int i, nid; unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; + /* save the state before borrow the nodemask */ + nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); @@ -4059,7 +4061,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) /* If kernelcore was not specified, there is no ZONE_MOVABLE */ if (!required_kernelcore) - return; + goto out; /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ find_usable_zone_for_movable(); @@ -4158,6 +4160,10 @@ restart: for (nid = 0; nid < MAX_NUMNODES; nid++) zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + +out: + /* restore the node_state */ + node_states[N_HIGH_MEMORY] = saved_node_state; } /* Any regular memory on that node ? */ @@ -4242,11 +4248,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].start_pfn, early_node_map[i].end_pfn); - /* - * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init - * that node_mask, clear it at first - */ - nodes_clear(node_states[N_HIGH_MEMORY]); /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); From 79d7f4ee23d41571d9e4663521b5e6604c55729a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 30 Jun 2009 11:41:38 -0700 Subject: [PATCH 247/741] gpio: pl061: fix probe error handling code Note that IRQ has not been initialized when kmalloc() fails. Also, use DECLARE_BITMAP() to make the code clearer. Signed-off-by: Baruch Siach Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/pl061.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c index aa8e7cb020d9..80e483986699 100644 --- a/drivers/gpio/pl061.c +++ b/drivers/gpio/pl061.c @@ -221,7 +221,7 @@ static int __init pl061_probe(struct amba_device *dev, struct amba_id *id) struct pl061_gpio *chip; struct list_head *chip_list; int ret, irq, i; - static unsigned long init_irq[BITS_TO_LONGS(NR_IRQS)]; + static DECLARE_BITMAP(init_irq, NR_IRQS); pdata = dev->dev.platform_data; if (pdata == NULL) @@ -280,6 +280,7 @@ static int __init pl061_probe(struct amba_device *dev, struct amba_id *id) if (!test_and_set_bit(irq, init_irq)) { /* list initialized? */ chip_list = kmalloc(sizeof(*chip_list), GFP_KERNEL); if (chip_list == NULL) { + clear_bit(irq, init_irq); ret = -ENOMEM; goto iounmap; } From 50efacf6711e6c75595afd9b92aa15c1e4f7c79d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 30 Jun 2009 11:41:39 -0700 Subject: [PATCH 248/741] gpio: pl061: fix IRQ handling for GPIOs >= PL061_GPIO_NR IRQ handling is wrong for any GPIO >= PL061_GPIO_NR. Fix this by implementing and using a proper .to_irq method. Signed-off-by: Baruch Siach Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/pl061.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c index 80e483986699..4ee4c8367a3f 100644 --- a/drivers/gpio/pl061.c +++ b/drivers/gpio/pl061.c @@ -109,6 +109,16 @@ static void pl061_set_value(struct gpio_chip *gc, unsigned offset, int value) writeb(!!value << offset, chip->base + (1 << (offset + 2))); } +static int pl061_to_irq(struct gpio_chip *gc, unsigned offset) +{ + struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc); + + if (chip->irq_base == (unsigned) -1) + return -EINVAL; + + return chip->irq_base + offset; +} + /* * PL061 GPIO IRQ */ @@ -200,7 +210,7 @@ static void pl061_irq_handler(unsigned irq, struct irq_desc *desc) desc->chip->ack(irq); list_for_each(ptr, chip_list) { unsigned long pending; - int gpio; + int offset; chip = list_entry(ptr, struct pl061_gpio, list); pending = readb(chip->base + GPIOMIS); @@ -209,8 +219,8 @@ static void pl061_irq_handler(unsigned irq, struct irq_desc *desc) if (pending == 0) continue; - for_each_bit(gpio, &pending, PL061_GPIO_NR) - generic_handle_irq(gpio_to_irq(gpio)); + for_each_bit(offset, &pending, PL061_GPIO_NR) + generic_handle_irq(pl061_to_irq(&chip->gc, offset)); } desc->chip->unmask(irq); } @@ -251,6 +261,7 @@ static int __init pl061_probe(struct amba_device *dev, struct amba_id *id) chip->gc.direction_output = pl061_direction_output; chip->gc.get = pl061_get_value; chip->gc.set = pl061_set_value; + chip->gc.to_irq = pl061_to_irq; chip->gc.base = pdata->gpio_base; chip->gc.ngpio = PL061_GPIO_NR; chip->gc.label = dev_name(&dev->dev); From eafad22a05fdaca60f06433ffe8810aaa920d539 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Tue, 30 Jun 2009 11:41:40 -0700 Subject: [PATCH 249/741] atyfb: fix HP OmniBook 500 reboot hang Apparently HP OmniBook 500's BIOS doesn't like the way atyfb reprograms the hardware. The BIOS will simply hang after a reboot. Fix the problem by restoring the hardware to it's original state on reboot. Signed-off-by: Ville Syrjala Cc: Mikulas Patocka Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/aty/atyfb.h | 2 + drivers/video/aty/atyfb_base.c | 89 ++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 9 deletions(-) diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h index 7691e73823d3..0369653b5d88 100644 --- a/drivers/video/aty/atyfb.h +++ b/drivers/video/aty/atyfb.h @@ -187,6 +187,8 @@ struct atyfb_par { int mtrr_reg; #endif u32 mem_cntl; + struct crtc saved_crtc; + union aty_pll saved_pll; }; /* diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 1207c208a30b..06782906daf5 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -66,6 +66,8 @@ #include #include #include +#include +#include #include #include @@ -249,8 +251,6 @@ static int aty_init(struct fb_info *info); static int store_video_par(char *videopar, unsigned char m64_num); #endif -static struct crtc saved_crtc; -static union aty_pll saved_pll; static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc); static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc); @@ -261,6 +261,8 @@ static void set_off_pitch(struct atyfb_par *par, const struct fb_info *info); static int read_aty_sense(const struct atyfb_par *par); #endif +static DEFINE_MUTEX(reboot_lock); +static struct fb_info *reboot_info; /* * Interface used by the world @@ -2390,9 +2392,9 @@ static int __devinit aty_init(struct fb_info *info) #endif /* CONFIG_FB_ATY_CT */ /* save previous video mode */ - aty_get_crtc(par, &saved_crtc); + aty_get_crtc(par, &par->saved_crtc); if(par->pll_ops->get_pll) - par->pll_ops->get_pll(info, &saved_pll); + par->pll_ops->get_pll(info, &par->saved_pll); par->mem_cntl = aty_ld_le32(MEM_CNTL, par); gtb_memsize = M64_HAS(GTB_DSP); @@ -2667,8 +2669,8 @@ static int __devinit aty_init(struct fb_info *info) aty_init_exit: /* restore video mode */ - aty_set_crtc(par, &saved_crtc); - par->pll_ops->set_pll(info, &saved_pll); + aty_set_crtc(par, &par->saved_crtc); + par->pll_ops->set_pll(info, &par->saved_pll); #ifdef CONFIG_MTRR if (par->mtrr_reg >= 0) { @@ -3502,6 +3504,11 @@ static int __devinit atyfb_pci_probe(struct pci_dev *pdev, const struct pci_devi par->mmap_map[1].prot_flag = _PAGE_E; #endif /* __sparc__ */ + mutex_lock(&reboot_lock); + if (!reboot_info) + reboot_info = info; + mutex_unlock(&reboot_lock); + return 0; err_release_io: @@ -3614,8 +3621,8 @@ static void __devexit atyfb_remove(struct fb_info *info) struct atyfb_par *par = (struct atyfb_par *) info->par; /* restore video mode */ - aty_set_crtc(par, &saved_crtc); - par->pll_ops->set_pll(info, &saved_pll); + aty_set_crtc(par, &par->saved_crtc); + par->pll_ops->set_pll(info, &par->saved_pll); unregister_framebuffer(info); @@ -3661,6 +3668,11 @@ static void __devexit atyfb_pci_remove(struct pci_dev *pdev) { struct fb_info *info = pci_get_drvdata(pdev); + mutex_lock(&reboot_lock); + if (reboot_info == info) + reboot_info = NULL; + mutex_unlock(&reboot_lock); + atyfb_remove(info); } @@ -3808,6 +3820,56 @@ static int __init atyfb_setup(char *options) } #endif /* MODULE */ +static int atyfb_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + struct atyfb_par *par; + + if (code != SYS_RESTART) + return NOTIFY_DONE; + + mutex_lock(&reboot_lock); + + if (!reboot_info) + goto out; + + if (!lock_fb_info(reboot_info)) + goto out; + + par = reboot_info->par; + + /* + * HP OmniBook 500's BIOS doesn't like the state of the + * hardware after atyfb has been used. Restore the hardware + * to the original state to allow successful reboots. + */ + aty_set_crtc(par, &par->saved_crtc); + par->pll_ops->set_pll(reboot_info, &par->saved_pll); + + unlock_fb_info(reboot_info); + out: + mutex_unlock(&reboot_lock); + + return NOTIFY_DONE; +} + +static struct notifier_block atyfb_reboot_notifier = { + .notifier_call = atyfb_reboot_notify, +}; + +static const struct dmi_system_id atyfb_reboot_ids[] = { + { + .ident = "HP OmniBook 500", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP OmniBook PC"), + DMI_MATCH(DMI_PRODUCT_VERSION, "HP OmniBook 500 FA"), + }, + }, + + { } +}; + static int __init atyfb_init(void) { int err1 = 1, err2 = 1; @@ -3826,11 +3888,20 @@ static int __init atyfb_init(void) err2 = atyfb_atari_probe(); #endif - return (err1 && err2) ? -ENODEV : 0; + if (err1 && err2) + return -ENODEV; + + if (dmi_check_system(atyfb_reboot_ids)) + register_reboot_notifier(&atyfb_reboot_notifier); + + return 0; } static void __exit atyfb_exit(void) { + if (dmi_check_system(atyfb_reboot_ids)) + unregister_reboot_notifier(&atyfb_reboot_notifier); + #ifdef CONFIG_PCI pci_unregister_driver(&atyfb_driver); #endif From ee905d0c58a440a5bd10c845e8305f6f7f706be2 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Tue, 30 Jun 2009 11:41:42 -0700 Subject: [PATCH 250/741] atyfb: fix alignment for block writes Block writes require 64 byte alignment. Since block writes could be used with SGRAM or WRAM also refine the memory type detection to check for either type before deciding to use the 64 byte alignment. Signed-off-by: Ville Syrjala Tested-by: Mikulas Patocka Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/aty/atyfb.h | 1 + drivers/video/aty/atyfb_base.c | 52 ++++++++++++++++++++++++-------- drivers/video/aty/mach64_accel.c | 7 +++-- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h index 0369653b5d88..1f39a62f899b 100644 --- a/drivers/video/aty/atyfb.h +++ b/drivers/video/aty/atyfb.h @@ -219,6 +219,7 @@ struct atyfb_par { #define M64F_XL_DLL 0x00080000 #define M64F_MFB_FORCE_4 0x00100000 #define M64F_HW_TRIPLE 0x00200000 +#define M64F_XL_MEM 0x00400000 /* * Register access */ diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 06782906daf5..63d3739d43a8 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -363,8 +363,8 @@ static unsigned long phys_guiregbase[FB_MAX] __devinitdata = { 0, }; #define ATI_CHIP_264GTPRO (ATI_MODERN_SET | M64F_SDRAM_MAGIC_PLL | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D) #define ATI_CHIP_264LTPRO (ATI_MODERN_SET | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D) -#define ATI_CHIP_264XL (ATI_MODERN_SET | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D | M64F_XL_DLL | M64F_MFB_FORCE_4) -#define ATI_CHIP_MOBILITY (ATI_MODERN_SET | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D | M64F_XL_DLL | M64F_MFB_FORCE_4 | M64F_MOBIL_BUS) +#define ATI_CHIP_264XL (ATI_MODERN_SET | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D | M64F_XL_DLL | M64F_MFB_FORCE_4 | M64F_XL_MEM) +#define ATI_CHIP_MOBILITY (ATI_MODERN_SET | M64F_HW_TRIPLE | M64F_FIFO_32 | M64F_RESET_3D | M64F_XL_DLL | M64F_MFB_FORCE_4 | M64F_XL_MEM | M64F_MOBIL_BUS) static struct { u16 pci_id; @@ -541,6 +541,7 @@ static char ram_edo[] __devinitdata = "EDO"; static char ram_sdram[] __devinitdata = "SDRAM (1:1)"; static char ram_sgram[] __devinitdata = "SGRAM (1:1)"; static char ram_sdram32[] __devinitdata = "SDRAM (2:1) (32-bit)"; +static char ram_wram[] __devinitdata = "WRAM"; static char ram_off[] __devinitdata = "OFF"; #endif /* CONFIG_FB_ATY_CT */ @@ -554,6 +555,10 @@ static char *aty_gx_ram[8] __devinitdata = { #ifdef CONFIG_FB_ATY_CT static char *aty_ct_ram[8] __devinitdata = { + ram_off, ram_dram, ram_edo, ram_edo, + ram_sdram, ram_sgram, ram_wram, ram_resv +}; +static char *aty_xl_ram[8] __devinitdata = { ram_off, ram_dram, ram_edo, ram_edo, ram_sdram, ram_sgram, ram_sdram32, ram_resv }; @@ -762,6 +767,17 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc) #endif /* CONFIG_FB_ATY_GENERIC_LCD */ } +static u32 calc_line_length(struct atyfb_par *par, u32 vxres, u32 bpp) +{ + u32 line_length = vxres * bpp / 8; + + if (par->ram_type == SGRAM || + (!M64_HAS(XL_MEM) && par->ram_type == WRAM)) + line_length = (line_length + 63) & ~63; + + return line_length; +} + static int aty_var_to_crtc(const struct fb_info *info, const struct fb_var_screeninfo *var, struct crtc *crtc) { @@ -771,13 +787,14 @@ static int aty_var_to_crtc(const struct fb_info *info, u32 h_total, h_disp, h_sync_strt, h_sync_end, h_sync_dly, h_sync_wid, h_sync_pol; u32 v_total, v_disp, v_sync_strt, v_sync_end, v_sync_wid, v_sync_pol, c_sync; u32 pix_width, dp_pix_width, dp_chain_mask; + u32 line_length; /* input */ - xres = var->xres; + xres = (var->xres + 7) & ~7; yres = var->yres; - vxres = var->xres_virtual; + vxres = (var->xres_virtual + 7) & ~7; vyres = var->yres_virtual; - xoffset = var->xoffset; + xoffset = (var->xoffset + 7) & ~7; yoffset = var->yoffset; bpp = var->bits_per_pixel; if (bpp == 16) @@ -829,7 +846,9 @@ static int aty_var_to_crtc(const struct fb_info *info, } else FAIL("invalid bpp"); - if (vxres * vyres * bpp / 8 > info->fix.smem_len) + line_length = calc_line_length(par, vxres, bpp); + + if (vyres * line_length > info->fix.smem_len) FAIL("not enough video RAM"); h_sync_pol = sync & FB_SYNC_HOR_HIGH_ACT ? 0 : 1; @@ -971,7 +990,9 @@ static int aty_var_to_crtc(const struct fb_info *info, crtc->xoffset = xoffset; crtc->yoffset = yoffset; crtc->bpp = bpp; - crtc->off_pitch = ((yoffset*vxres+xoffset)*bpp/64) | (vxres<<19); + crtc->off_pitch = + ((yoffset * line_length + xoffset * bpp / 8) / 8) | + ((line_length / bpp) << 22); crtc->vline_crnt_vline = 0; crtc->h_tot_disp = h_total | (h_disp<<16); @@ -1396,7 +1417,9 @@ static int atyfb_set_par(struct fb_info *info) } aty_st_8(DAC_MASK, 0xff, par); - info->fix.line_length = var->xres_virtual * var->bits_per_pixel/8; + info->fix.line_length = calc_line_length(par, var->xres_virtual, + var->bits_per_pixel); + info->fix.visual = var->bits_per_pixel <= 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR; @@ -1507,10 +1530,12 @@ static void set_off_pitch(struct atyfb_par *par, const struct fb_info *info) { u32 xoffset = info->var.xoffset; u32 yoffset = info->var.yoffset; - u32 vxres = par->crtc.vxres; + u32 line_length = info->fix.line_length; u32 bpp = info->var.bits_per_pixel; - par->crtc.off_pitch = ((yoffset * vxres + xoffset) * bpp / 64) | (vxres << 19); + par->crtc.off_pitch = + ((yoffset * line_length + xoffset * bpp / 8) / 8) | + ((line_length / bpp) << 22); } @@ -2203,7 +2228,7 @@ static void __devinit aty_calc_mem_refresh(struct atyfb_par *par, int xclk) const int *refresh_tbl; int i, size; - if (IS_XL(par->pci_id) || IS_MOBILITY(par->pci_id)) { + if (M64_HAS(XL_MEM)) { refresh_tbl = ragexl_tbl; size = ARRAY_SIZE(ragexl_tbl); } else { @@ -2337,7 +2362,10 @@ static int __devinit aty_init(struct fb_info *info) par->pll_ops = &aty_pll_ct; par->bus_type = PCI; par->ram_type = (aty_ld_le32(CNFG_STAT0, par) & 0x07); - ramname = aty_ct_ram[par->ram_type]; + if (M64_HAS(XL_MEM)) + ramname = aty_xl_ram[par->ram_type]; + else + ramname = aty_ct_ram[par->ram_type]; /* for many chips, the mclk is 67 MHz for SDRAM, 63 MHz otherwise */ if (par->pll_limits.mclk == 67 && par->ram_type < SDRAM) par->pll_limits.mclk = 63; diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c index 0cc9724e61a2..51fcc0a2c94a 100644 --- a/drivers/video/aty/mach64_accel.c +++ b/drivers/video/aty/mach64_accel.c @@ -63,14 +63,17 @@ static void reset_GTC_3D_engine(const struct atyfb_par *par) void aty_init_engine(struct atyfb_par *par, struct fb_info *info) { u32 pitch_value; + u32 vxres; /* determine modal information from global mode structure */ - pitch_value = info->var.xres_virtual; + pitch_value = info->fix.line_length / (info->var.bits_per_pixel / 8); + vxres = info->var.xres_virtual; if (info->var.bits_per_pixel == 24) { /* In 24 bpp, the engine is in 8 bpp - this requires that all */ /* horizontal coordinates and widths must be adjusted */ pitch_value *= 3; + vxres *= 3; } /* On GTC (RagePro), we need to reset the 3D engine before */ @@ -133,7 +136,7 @@ void aty_init_engine(struct atyfb_par *par, struct fb_info *info) aty_st_le32(SC_LEFT, 0, par); aty_st_le32(SC_TOP, 0, par); aty_st_le32(SC_BOTTOM, par->crtc.vyres - 1, par); - aty_st_le32(SC_RIGHT, pitch_value - 1, par); + aty_st_le32(SC_RIGHT, vxres - 1, par); /* set background color to minimum value (usually BLACK) */ aty_st_le32(DP_BKGD_CLR, 0, par); From 9980060bad5607ca6db7fb8683de671b522e56a4 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 Jun 2009 11:41:43 -0700 Subject: [PATCH 251/741] bfin: delay IRQ registration until driver is ready Make sure we do not actually request the RTC IRQ until the device driver is fully ready to handle and process any interrupt. This way a spurious interrupt won't crash the system (which may happen if the bootloader was poking the RTC right before booting Linux). Signed-off-by: Mike Frysinger Signed-off-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-bfin.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index aafd3e6ebb0d..a118eb0f1e67 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -1,8 +1,8 @@ /* * Blackfin On-Chip Real Time Clock Driver - * Supports BF52[257]/BF53[123]/BF53[467]/BF54[24789] + * Supports BF51x/BF52x/BF53[123]/BF53[467]/BF54x * - * Copyright 2004-2008 Analog Devices Inc. + * Copyright 2004-2009 Analog Devices Inc. * * Enter bugs at http://blackfin.uclinux.org/ * @@ -363,7 +363,7 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev) struct bfin_rtc *rtc; struct device *dev = &pdev->dev; int ret = 0; - unsigned long timeout; + unsigned long timeout = jiffies + HZ; dev_dbg_stamp(dev); @@ -374,32 +374,32 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); device_init_wakeup(dev, 1); + /* Register our RTC with the RTC framework */ + rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, + THIS_MODULE); + if (unlikely(IS_ERR(rtc->rtc_dev))) { + ret = PTR_ERR(rtc->rtc_dev); + goto err; + } + /* Grab the IRQ and init the hardware */ ret = request_irq(IRQ_RTC, bfin_rtc_interrupt, IRQF_SHARED, pdev->name, dev); if (unlikely(ret)) - goto err; + goto err_reg; /* sometimes the bootloader touched things, but the write complete was not * enabled, so let's just do a quick timeout here since the IRQ will not fire ... */ - timeout = jiffies + HZ; while (bfin_read_RTC_ISTAT() & RTC_ISTAT_WRITE_PENDING) if (time_after(jiffies, timeout)) break; bfin_rtc_reset(dev, RTC_ISTAT_WRITE_COMPLETE); bfin_write_RTC_SWCNT(0); - /* Register our RTC with the RTC framework */ - rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, THIS_MODULE); - if (unlikely(IS_ERR(rtc->rtc_dev))) { - ret = PTR_ERR(rtc->rtc_dev); - goto err_irq; - } - return 0; - err_irq: - free_irq(IRQ_RTC, dev); - err: +err_reg: + rtc_device_unregister(rtc->rtc_dev); +err: kfree(rtc); return ret; } From 8516a500029890a72622d245f8ed32c4e30969b7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 30 Jun 2009 11:41:44 -0700 Subject: [PATCH 252/741] floppy: fix lock imbalance A crappy macro prevents us unlocking on a fail path. Expand the macro and unlock appropriatelly. Signed-off-by: Jiri Slaby Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/floppy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 862b40c90181..91b753013780 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3327,7 +3327,10 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, if (!capable(CAP_SYS_ADMIN)) return -EPERM; mutex_lock(&open_lock); - LOCK_FDC(drive, 1); + if (lock_fdc(drive, 1)) { + mutex_unlock(&open_lock); + return -EINTR; + } floppy_type[type] = *g; floppy_type[type].name = "user format"; for (cnt = type << 2; cnt < (type << 2) + 4; cnt++) From 752fa51e4c5182c3c257f1cede90577a7e213c58 Mon Sep 17 00:00:00 2001 From: Wolfgang Illmeyer Date: Tue, 30 Jun 2009 11:41:44 -0700 Subject: [PATCH 253/741] hostfs: set maximum filesize in superblock for proper LFS support Maximum file size for hostfs mounts defaults to 2GB, so bigger files cannot be read/written through hostfs. This patch initializes the maximum file size to MAX_LFS_SIZE. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13531 Signed-off-by: Wolfgang Illmeyer Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs_kern.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe02ad4740e7..032604e5ef2c 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as by sprintf: avoid that. */ if (req_root == NULL) From aee3ff1b413cff44e7d91dd1901cacd8988ce9cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jun 2009 22:24:54 +0100 Subject: [PATCH 254/741] FRV: Wire up new syscalls Wire up new syscalls rt_tgsigqueueinfo and perf_counter_open. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/frv/include/asm/unistd.h | 4 +++- arch/frv/kernel/entry.S | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 96d78d5d2c41..4a8fb427ce0a 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -341,10 +341,12 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 +#define __NR_perf_counter_open 336 #ifdef __KERNEL__ -#define NR_syscalls 335 +#define NR_syscalls 337 #define __ARCH_WANT_IPC_PARSE_VERSION /* #define __ARCH_WANT_OLD_READDIR */ diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 356e0e327a89..fde1e446b440 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1524,5 +1524,7 @@ sys_call_table: .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_rt_tgsigqueueinfo /* 335 */ + .long sys_perf_counter_open syscall_table_size = (. - sys_call_table) From 6086071005674eb982d898c75269c931240154cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jun 2009 22:33:15 +0100 Subject: [PATCH 255/741] MN10300: Wire up new syscalls Wire up new syscalls rt_tgsigqueueinfo and perf_counter_open. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/mn10300/include/asm/unistd.h | 4 +++- arch/mn10300/kernel/entry.S | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index fef5b434dadc..fad68616af32 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -346,10 +346,12 @@ #define __NR_inotify_init1 333 #define __NR_preadv 334 #define __NR_pwritev 335 +#define __NR_rt_tgsigqueueinfo 336 +#define __NR_perf_counter_open 337 #ifdef __KERNEL__ -#define NR_syscalls 326 +#define NR_syscalls 338 /* * specify the deprecated syscalls we want to support on this arch diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index 7408a27199f3..e0d2563af4f2 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -722,6 +722,8 @@ ENTRY(sys_call_table) .long sys_inotify_init1 .long sys_preadv .long sys_pwritev /* 335 */ + .long sys_rt_tgsigqueueinfo + .long sys_perf_counter_open nr_syscalls=(.-sys_call_table)/4 From 1ec22eb2b4a2e1a763106bce36b11c02eaa84e61 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 12:27:21 +1000 Subject: [PATCH 256/741] md: fix error path when duplicate name is found on md device creation. When an md device is created by name (rather than number) we need to check that the name is not already in use. If this check finds a duplicate, we return an error without dropping the lock or freeing the newly create mddev. This patch fixes that. Cc: stable@kernel.org Found-by: Jiri Slaby Signed-off-by: NeilBrown --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2166af8a7654..58bee2366ea8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3862,6 +3862,8 @@ static int md_alloc(dev_t dev, char *name) if (mddev2->gendisk && strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); + mutex_unlock(&disks_mutex); + mddev_put(mddev); return -EEXIST; } spin_unlock(&all_mddevs_lock); From 0909dc448c98ed5021c87ffdfc09fb473aa464ab Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 12:27:21 +1000 Subject: [PATCH 257/741] md: tidy up error paths in md_alloc As the recent bug in md_alloc showed, having a single exit path for unlocking and putting is a good idea. So restructure md_alloc to have a single mutex_unlock and mddev_put, and use gotos where necessary. Found-by: Jiri Slaby Signed-off-by: NeilBrown --- drivers/md/md.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 58bee2366ea8..65fe35b5e34a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3846,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name) flush_scheduled_work(); mutex_lock(&disks_mutex); - if (mddev->gendisk) { - mutex_unlock(&disks_mutex); - mddev_put(mddev); - return -EEXIST; - } + error = -EEXIST; + if (mddev->gendisk) + goto abort; if (name) { /* Need to ensure that 'name' is not a duplicate. @@ -3862,19 +3860,15 @@ static int md_alloc(dev_t dev, char *name) if (mddev2->gendisk && strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); - mutex_unlock(&disks_mutex); - mddev_put(mddev); - return -EEXIST; + goto abort; } spin_unlock(&all_mddevs_lock); } + error = -ENOMEM; mddev->queue = blk_alloc_queue(GFP_KERNEL); - if (!mddev->queue) { - mutex_unlock(&disks_mutex); - mddev_put(mddev); - return -ENOMEM; - } + if (!mddev->queue) + goto abort; mddev->queue->queuedata = mddev; /* Can be unlocked because the queue is new: no concurrency */ @@ -3884,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name) disk = alloc_disk(1 << shift); if (!disk) { - mutex_unlock(&disks_mutex); blk_cleanup_queue(mddev->queue); mddev->queue = NULL; - mddev_put(mddev); - return -ENOMEM; + goto abort; } disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; @@ -3910,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name) mddev->gendisk = disk; error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk_to_dev(disk)->kobj, "%s", "md"); - mutex_unlock(&disks_mutex); - if (error) + if (error) { + /* This isn't possible, but as kobject_init_and_add is marked + * __must_check, we must do something with the result + */ printk(KERN_WARNING "md: cannot register %s/md - name in use\n", disk->disk_name); - else { + error = 0; + } + abort: + mutex_unlock(&disks_mutex); + if (!error) { kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); } mddev_put(mddev); - return 0; + return error; } static struct kobject *md_probe(dev_t dev, int *part, void *data) From eaea43abf30c8ccb447c190e7c94b46b5f75eae6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:49:40 +0000 Subject: [PATCH 258/741] cdc_eem: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/cdc_eem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c index 80e01778dd3b..cd35d50e46d4 100644 --- a/drivers/net/usb/cdc_eem.c +++ b/drivers/net/usb/cdc_eem.c @@ -319,7 +319,7 @@ static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return crc == crc2; if (unlikely(crc != crc2)) { - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; dev_kfree_skb_any(skb2); } else usbnet_skb_return(dev, skb2); From 9612101cb33862cc160069cc8423926d61db51f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:50:51 +0000 Subject: [PATCH 259/741] dm9601: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/dm9601.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 7ae82446b93a..1d3730d6690f 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -513,11 +513,11 @@ static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) len = (skb->data[1] | (skb->data[2] << 8)) - 4; if (unlikely(status & 0xbf)) { - if (status & 0x01) dev->stats.rx_fifo_errors++; - if (status & 0x02) dev->stats.rx_crc_errors++; - if (status & 0x04) dev->stats.rx_frame_errors++; - if (status & 0x20) dev->stats.rx_missed_errors++; - if (status & 0x90) dev->stats.rx_length_errors++; + if (status & 0x01) dev->net->stats.rx_fifo_errors++; + if (status & 0x02) dev->net->stats.rx_crc_errors++; + if (status & 0x04) dev->net->stats.rx_frame_errors++; + if (status & 0x20) dev->net->stats.rx_missed_errors++; + if (status & 0x90) dev->net->stats.rx_length_errors++; return 0; } From a22d2b36a2c4ca58c5914072a88704377bbd34f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:51:40 +0000 Subject: [PATCH 260/741] net1080: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/net1080.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index 034e8a73ca6b..aeb1ab03a9ee 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -433,7 +433,7 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) dbg("rx framesize %d range %d..%d mtu %d", skb->len, net->hard_header_len, dev->hard_mtu, net->mtu); #endif - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; nc_ensure_sync(dev); return 0; } @@ -442,12 +442,12 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) hdr_len = le16_to_cpup(&header->hdr_len); packet_len = le16_to_cpup(&header->packet_len); if (FRAMED_SIZE(packet_len) > NC_MAX_PACKET) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("packet too big, %d", packet_len); nc_ensure_sync(dev); return 0; } else if (hdr_len < MIN_HEADER) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("header too short, %d", hdr_len); nc_ensure_sync(dev); return 0; @@ -465,21 +465,21 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if ((packet_len & 0x01) == 0) { if (skb->data [packet_len] != PAD_BYTE) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("bad pad"); return 0; } skb_trim(skb, skb->len - 1); } if (skb->len != packet_len) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("bad packet len %d (expected %d)", skb->len, packet_len); nc_ensure_sync(dev); return 0; } if (header->packet_id != get_unaligned(&trailer->packet_id)) { - dev->stats.rx_fifo_errors++; + dev->net->stats.rx_fifo_errors++; dbg("(2+ dropped) rx packet_id mismatch 0x%x 0x%x", le16_to_cpu(header->packet_id), le16_to_cpu(trailer->packet_id)); From 58e2e7d5913e7a2a6d87ef30d3b52e66b88e6e1d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:52:26 +0000 Subject: [PATCH 261/741] rndis_host: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 1bf243ef950e..2232232b7989 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -487,7 +487,7 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (unlikely(hdr->msg_type != RNDIS_MSG_PACKET || skb->len < msg_len || (data_offset + data_len + 8) > msg_len)) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; devdbg(dev, "bad rndis message %d/%d/%d/%d, len %d", le32_to_cpu(hdr->msg_type), msg_len, data_offset, data_len, skb->len); From 80667ac13a6cf2c3a3ff275a2a72809671299acb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:53:00 +0000 Subject: [PATCH 262/741] smsc95xx: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 89a91f8c22de..fe045896406b 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1108,18 +1108,18 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (unlikely(header & RX_STS_ES_)) { if (netif_msg_rx_err(dev)) devdbg(dev, "Error header=0x%08x", header); - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + dev->net->stats.rx_errors++; + dev->net->stats.rx_dropped++; if (header & RX_STS_CRC_) { - dev->stats.rx_crc_errors++; + dev->net->stats.rx_crc_errors++; } else { if (header & (RX_STS_TL_ | RX_STS_RF_)) - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; if ((header & RX_STS_LE_) && (!(header & RX_STS_FT_))) - dev->stats.rx_length_errors++; + dev->net->stats.rx_length_errors++; } } else { /* ETH_FRAME_LEN + 4(CRC) + 2(COE) + 4(Vlan) */ From 7963837f933df8a8ada56fa8f8205ebab40f84d0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:53:28 +0000 Subject: [PATCH 263/741] usbnet: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 22c0585a0319..edfd9e10ceba 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -234,8 +234,8 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) int status; skb->protocol = eth_type_trans (skb, dev->net); - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->net->stats.rx_packets++; + dev->net->stats.rx_bytes += skb->len; if (netif_msg_rx_status (dev)) devdbg (dev, "< rx, len %zu, type 0x%x", @@ -397,7 +397,7 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb) if (netif_msg_rx_err (dev)) devdbg (dev, "drop"); error: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; skb_queue_tail (&dev->done, skb); } } @@ -420,8 +420,8 @@ static void rx_complete (struct urb *urb) case 0: if (skb->len < dev->net->hard_header_len) { entry->state = rx_cleanup; - dev->stats.rx_errors++; - dev->stats.rx_length_errors++; + dev->net->stats.rx_errors++; + dev->net->stats.rx_length_errors++; if (netif_msg_rx_err (dev)) devdbg (dev, "rx length %d", skb->len); } @@ -433,7 +433,7 @@ static void rx_complete (struct urb *urb) * storm, recovering as needed. */ case -EPIPE: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; usbnet_defer_kevent (dev, EVENT_RX_HALT); // FALLTHROUGH @@ -451,7 +451,7 @@ static void rx_complete (struct urb *urb) case -EPROTO: case -ETIME: case -EILSEQ: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; if (!timer_pending (&dev->delay)) { mod_timer (&dev->delay, jiffies + THROTTLE_JIFFIES); if (netif_msg_link (dev)) @@ -465,12 +465,12 @@ block: /* data overrun ... flush fifo? */ case -EOVERFLOW: - dev->stats.rx_over_errors++; + dev->net->stats.rx_over_errors++; // FALLTHROUGH default: entry->state = rx_cleanup; - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; if (netif_msg_rx_err (dev)) devdbg (dev, "rx status %d", urb_status); break; @@ -583,8 +583,8 @@ int usbnet_stop (struct net_device *net) if (netif_msg_ifdown (dev)) devinfo (dev, "stop stats: rx/tx %ld/%ld, errs %ld/%ld", - dev->stats.rx_packets, dev->stats.tx_packets, - dev->stats.rx_errors, dev->stats.tx_errors + net->stats.rx_packets, net->stats.tx_packets, + net->stats.rx_errors, net->stats.tx_errors ); // ensure there are no more active urbs @@ -891,10 +891,10 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += entry->length; + dev->net->stats.tx_packets++; + dev->net->stats.tx_bytes += entry->length; } else { - dev->stats.tx_errors++; + dev->net->stats.tx_errors++; switch (urb->status) { case -EPIPE: @@ -1020,7 +1020,7 @@ int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) devdbg (dev, "drop, code %d", retval); drop: retval = NET_XMIT_SUCCESS; - dev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; if (skb) dev_kfree_skb_any (skb); usb_free_urb (urb); From d9d62f3f2c6fa609883714f6fd6cd710a83d307f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:54:12 +0000 Subject: [PATCH 264/741] usbnet: Remove private stats structure Now that nothing uses the private stats structure we can remove it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 5d44059f6d63..310e18a880ff 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -42,7 +42,6 @@ struct usbnet { /* protocol/interface state */ struct net_device *net; - struct net_device_stats stats; int msg_enable; unsigned long data [5]; u32 xid; From 88d2b81f4ee8f9ea3798dbe6105beb5609844317 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Tue, 30 Jun 2009 11:43:55 +0000 Subject: [PATCH 265/741] ixgbe: Fix SFP log messages We had a wide range of log messages for the same sort of SFP failure. This patch makes them all more similar and less confusing along with converting them to dev_err. Signed-off-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index e756e220db32..30d8c0e41a9d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -2701,7 +2701,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) */ err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "PHY not supported on this NIC %d\n", err); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); ixgbe_down(adapter); return err; } @@ -3720,10 +3723,11 @@ static void ixgbe_sfp_task(struct work_struct *work) goto reschedule; ret = hw->phy.ops.reset(hw); if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "failed to initialize because an " - "unsupported SFP+ module type was detected.\n" - "Reload the driver after installing a " - "supported module.\n"); + dev_err(&adapter->pdev->dev, "failed to initialize " + "because an unsupported SFP+ module type " + "was detected.\n" + "Reload the driver after installing a " + "supported module.\n"); unregister_netdev(adapter->netdev); } else { DPRINTK(PROBE, INFO, "detected SFP+: %d\n", @@ -4526,7 +4530,10 @@ static void ixgbe_sfp_config_module_task(struct work_struct *work) adapter->flags |= IXGBE_FLAG_IN_SFP_MOD_TASK; err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "PHY not supported on this NIC %d\n", err); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); ixgbe_down(adapter); return; } @@ -5513,8 +5520,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, round_jiffies(jiffies + (2 * HZ))); err = 0; } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - dev_err(&adapter->pdev->dev, "failed to load because an " - "unsupported SFP+ module type was detected.\n"); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); goto err_sw_init; } else if (err) { dev_err(&adapter->pdev->dev, "HW Init failed: %d\n", err); From a380137900fca5c79e6daa9500bdb6ea5649188e Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Tue, 30 Jun 2009 11:44:16 +0000 Subject: [PATCH 266/741] ixgbe: Fix device capabilities of 82599 single speed fiber NICs. 82599 single speed fiber modules only support 10G/Full. Return proper device capabilities while querrying the adapter and error while changing device advertisement/speed/duplex capabilities. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_ethtool.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 86f4f3e36f27..0f7b6a3a2e68 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -139,7 +139,7 @@ static int ixgbe_get_settings(struct net_device *netdev, ecmd->autoneg = AUTONEG_ENABLE; ecmd->transceiver = XCVR_EXTERNAL; if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->mac.type == ixgbe_mac_82599EB)) { + (hw->phy.multispeed_fiber)) { ecmd->supported |= (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg); @@ -217,7 +217,7 @@ static int ixgbe_set_settings(struct net_device *netdev, s32 err = 0; if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->mac.type == ixgbe_mac_82599EB)) { + (hw->phy.multispeed_fiber)) { /* 10000/copper and 1000/copper must autoneg * this function does not support any duplex forcing, but can * limit the advertising of the adapter to only 10000 or 1000 */ @@ -245,6 +245,7 @@ static int ixgbe_set_settings(struct net_device *netdev, } else { /* in this case we currently only support 10Gb/FULL */ if ((ecmd->autoneg == AUTONEG_ENABLE) || + (ecmd->advertising != ADVERTISED_10000baseT_Full) || (ecmd->speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL)) return -EINVAL; } From a1f25324b93ecdab1cbb27d3e9c4cafecb06ceda Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Tue, 30 Jun 2009 11:44:36 +0000 Subject: [PATCH 267/741] ixgbe: Fix link capabilities during adapter resets Adapter link advertisement capabilities were not persistent during adapter resets. While configuring multispeed fiber link check for phy autoneg_advertised settings before overwriting with default link capabilities Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 30d8c0e41a9d..fce2ef49b3a7 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4506,7 +4506,8 @@ static void ixgbe_multispeed_fiber_task(struct work_struct *work) u32 autoneg; adapter->flags |= IXGBE_FLAG_IN_SFP_LINK_TASK; - if (hw->mac.ops.get_link_capabilities) + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &hw->mac.autoneg); if (hw->mac.ops.setup_link_speed) From 4f57ca6e17edfc56ddde5c87eb893e47e0d2d343 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 11:44:56 +0000 Subject: [PATCH 268/741] ixgbe: fix unmap length bug This patch addresses three WARN_ON statements from DMA-API debug code ixgbe is mapping more than it unmaps, reduce the length of the map call and remove the "used once" local variable. found by Joerg Roedel in 2.6.30, so is a candidate for -stable. in addition, fix missing ->dma = 0 after unmap to prevent double free with pci_unmap_single and lastly, don't unmap (half) pages that aren't mapped. Signed-off-by: Jesse Brandeburg CC: Joerg Roedel Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index fce2ef49b3a7..5588ef493a3d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -563,7 +563,6 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; unsigned int i; - unsigned int bufsz = rx_ring->rx_buf_len + NET_IP_ALIGN; i = rx_ring->next_to_use; bi = &rx_ring->rx_buffer_info[i]; @@ -593,7 +592,9 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, if (!bi->skb) { struct sk_buff *skb; - skb = netdev_alloc_skb(adapter->netdev, bufsz); + skb = netdev_alloc_skb(adapter->netdev, + (rx_ring->rx_buf_len + + NET_IP_ALIGN)); if (!skb) { adapter->alloc_rx_buff_failed++; @@ -608,7 +609,8 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, skb_reserve(skb, NET_IP_ALIGN); bi->skb = skb; - bi->dma = pci_map_single(pdev, skb->data, bufsz, + bi->dma = pci_map_single(pdev, skb->data, + rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); } /* Refresh the desc even if buffer_addrs didn't change because @@ -732,6 +734,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, pci_unmap_single(pdev, rx_buffer_info->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + rx_buffer_info->dma = 0; skb_put(skb, len); } @@ -2815,9 +2818,11 @@ static void ixgbe_clean_rx_ring(struct ixgbe_adapter *adapter, } if (!rx_buffer_info->page) continue; - pci_unmap_page(pdev, rx_buffer_info->page_dma, PAGE_SIZE / 2, - PCI_DMA_FROMDEVICE); - rx_buffer_info->page_dma = 0; + if (rx_buffer_info->page_dma) { + pci_unmap_page(pdev, rx_buffer_info->page_dma, + PAGE_SIZE / 2, PCI_DMA_FROMDEVICE); + rx_buffer_info->page_dma = 0; + } put_page(rx_buffer_info->page); rx_buffer_info->page = NULL; rx_buffer_info->page_offset = 0; From 91615f765a2935b6cbae424b9eee1585ed681ae6 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 12:45:15 +0000 Subject: [PATCH 269/741] igb: fix unmap length bug driver was mixing NET_IP_ALIGN count bytes in map/unmap calls unevenly. Only map the bytes that the hardware might dma into also fix unmap related bug where ->dma was not being cleared after unmap Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/igb_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ea17319624aa..468356d124ea 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -4549,11 +4549,12 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, cleaned = true; cleaned_count++; + /* this is the fast path for the non-packet split case */ if (!adapter->rx_ps_hdr_size) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_buffer_len + - NET_IP_ALIGN, + adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; skb_put(skb, length); goto send_up; } @@ -4570,8 +4571,9 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, if (!skb_shinfo(skb)->nr_frags) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_ps_hdr_size + NET_IP_ALIGN, + adapter->rx_ps_hdr_size, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; skb_put(skb, hlen); } @@ -4713,7 +4715,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, bufsz = adapter->rx_ps_hdr_size; else bufsz = adapter->rx_buffer_len; - bufsz += NET_IP_ALIGN; while (cleaned_count--) { rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); @@ -4737,7 +4738,7 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, } if (!buffer_info->skb) { - skb = netdev_alloc_skb(netdev, bufsz); + skb = netdev_alloc_skb(netdev, bufsz + NET_IP_ALIGN); if (!skb) { adapter->alloc_rx_buff_failed++; goto no_buffers; From 679be3ba0c493eb66d22c206273729ce50925e85 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 12:45:34 +0000 Subject: [PATCH 270/741] e1000: fix unmap bug as reported by kerneloops.org [ 121.781161] ------------[ cut here ]------------ [ 121.781171] WARNING: at lib/dma-debug.c:793 check_unmap+0x14e/0x577() [ 121.781173] Hardware name: S5520HC [ 121.781177] e1000 0000:0a:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000001d688b0fa] [size=1522 bytes] [ 121.781180] Modules linked in: e1000 mdio dca [last unloaded: ixgbe] [ 121.781187] Pid: 4793, comm: bash Tainted: P 2.6.30-master-06161113 #3 [ 121.781190] Call Trace: [ 121.781195] [] ? check_unmap+0x14e/0x577 [ 121.781201] [] warn_slowpath_common+0x77/0x8f [ 121.781205] [] warn_slowpath_fmt+0x9f/0xa1 [ 121.781212] [] ? _spin_lock_irqsave+0x3f/0x49 [ 121.781216] [] ? get_hash_bucket+0x28/0x33 [ 121.781220] [] check_unmap+0x14e/0x577 [ 121.781225] [] ? check_bytes_and_report+0x38/0xcb [ 121.781230] [] debug_dma_unmap_page+0x80/0x92 [ 121.781234] [] ? unmap_single+0x1a/0x4e [ 121.781239] [] ? __kfree_skb+0x74/0x78 [ 121.781250] [] pci_unmap_single+0x64/0x6d [e1000] [ 121.781259] [] e1000_clean_rx_ring+0x4c/0xbf [e1000] [ 121.781268] [] e1000_clean_all_rx_rings+0x28/0x36 [e1000] [ 121.781277] [] e1000_down+0x138/0x141 [e1000] [ 121.781286] [] __e1000_shutdown+0x6b/0x198 [e1000] [ 121.781296] [] e1000_suspend+0x17/0x50 [e1000] [ 121.781301] [] pci_legacy_suspend+0x3b/0xbe [ 121.781305] [] pci_pm_suspend+0x3e/0xf1 [ 121.781310] [] pm_op+0x57/0xde [ 121.781314] [] dpm_suspend_start+0x31e/0x470 [ 121.781319] [] suspend_devices_and_enter+0x3e/0x1a2 [ 121.781323] [] enter_state+0xd1/0x127 [ 121.781327] [] state_store+0xa7/0xc9 [ 121.781332] [] kobj_attr_store+0x17/0x19 [ 121.781336] [] sysfs_write_file+0xe5/0x121 [ 121.781341] [] vfs_write+0xab/0x105 [ 121.781344] [] sys_write+0x47/0x6d [ 121.781349] [] system_call_fastpath+0x16/0x1b [ 121.781352] ---[ end trace 97bacaaac2ed7786 ]--- Fix is to correctly zero out internal ->dma value when unmapping and make sure never to unmap unless there specifically was a mapping done. Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 5e3356f8eb5a..972e06d984c8 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2185,12 +2185,16 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; - if (buffer_info->skb) { + if (buffer_info->dma) { pci_unmap_single(pdev, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); + } + buffer_info->dma = 0; + + if (buffer_info->skb) { dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; } @@ -4033,6 +4037,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; length = le16_to_cpu(rx_desc->length); /* !EOP means multiple descriptors were used to store a single @@ -4222,6 +4227,7 @@ map_skb: pci_unmap_single(pdev, buffer_info->dma, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; break; /* while !buffer_info->skb */ } From eab633021c26025b34f36f79f0311d3d99f40ceb Mon Sep 17 00:00:00 2001 From: Andre Detsch Date: Tue, 30 Jun 2009 12:46:13 +0000 Subject: [PATCH 271/741] e1000: return PCI_ERS_RESULT_DISCONNECT on permanent error PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This state is not checked in many of the network drivers. The patch fixes the omission in the e1000 driver. Based on Mike Mason's similar patch for e1000e. Signed-off-by: Andre Detsch CC: Mike Mason Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 972e06d984c8..5b8cbdb4b520 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -4823,6 +4823,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) e1000_down(adapter); pci_disable_device(pdev); From c93b5a76d58656158d195a7df507ebc660010969 Mon Sep 17 00:00:00 2001 From: Mike Mason Date: Tue, 30 Jun 2009 12:45:53 +0000 Subject: [PATCH 272/741] e1000e: io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT on permanent failure PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This state is not checked in many of the network drivers. This patch fixes the omission in the e1000e driver. Signed-off-by: Mike Mason Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 679885a122b4..63415bb6f48f 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4785,6 +4785,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) e1000e_down(adapter); pci_disable_device(pdev); From 59ed6eecff4aa00c5c5d18ffd180acac108d596e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 30 Jun 2009 12:46:34 +0000 Subject: [PATCH 273/741] igb: return PCI_ERS_RESULT_DISCONNECT on permanent error PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This patch fixes the issue for igb. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/igb_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 468356d124ea..be480292aba1 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -5339,6 +5339,9 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) igb_down(adapter); pci_disable_device(pdev); From f8a68e752bc4e39644843403168137663c984524 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Jun 2009 16:27:17 +0000 Subject: [PATCH 274/741] Revert "ipv4: arp announce, arp_proxy and windows ip conflict verification" This reverts commit 73ce7b01b4496a5fbf9caf63033c874be692333f. After discovering that we don't listen to gratuitious arps in 2.6.30 I tracked the failure down to this commit. The patch makes absolutely no sense. RFC2131 RFC3927 and RFC5227. are all in agreement that an arp request with sip == 0 should be used for the probe (to prevent learning) and an arp request with sip == tip should be used for the gratitous announcement that people can learn from. It appears the author of the broken patch got those two cases confused and modified the code to drop all gratuitous arp traffic. Ouch! Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/arp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8a3881e28aca..c29d75d8f1b1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,11 +801,8 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* - * Special case: IPv4 duplicate address detection packet (RFC2131) - * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) - */ - if (sip == 0 || tip == sip) { + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) From a5c308d4d1659b1f4833b863394e3e24cdbdfc6e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 13:15:35 +1000 Subject: [PATCH 275/741] md/raid5: suspend shouldn't affect read requests. md allows write to regions on an array to be suspended temporarily. This allows user-space to participate is aspects of reshape. In particular, data can be copied with not risk of a race. We should not be blocking read requests though, so don't. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ef9b6abfc7..1f444ae07f89 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3702,7 +3702,8 @@ static int make_request(struct request_queue *q, struct bio * bi) /* FIXME what if we get a false positive because these * are being updated. */ - if (logical_sector >= mddev->suspend_lo && + if (bio_data_dir(bi) == WRITE && + logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); schedule(); From e62e58a5ffdc98ac28d8dbd070c857620d541f99 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Jul 2009 13:15:35 +1000 Subject: [PATCH 276/741] md: use interruptible wait when duration is controlled by userspace. User space can set various limits on an md array so that resync waits when it gets to a certain point, or so that I/O is blocked for a short while. When md is waiting against one of these limit, it should use an interruptible wait so as not to add to the load average, and so are not to trigger a warning if the wait goes on for too long. Signed-off-by: NeilBrown --- drivers/md/md.c | 14 ++++++++++---- drivers/md/raid5.c | 15 +++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 65fe35b5e34a..0f4a70c43ffc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6336,10 +6336,16 @@ void md_do_sync(mddev_t *mddev) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - if (j >= mddev->resync_max) - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); + while (j >= mddev->resync_max && !kthread_should_stop()) { + /* As this condition is controlled by user-space, + * we can block indefinitely, so use '_interruptible' + * to avoid triggering warnings. + */ + flush_signals(current); /* just in case */ + wait_event_interruptible(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + } if (kthread_should_stop()) goto interrupted; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1f444ae07f89..37835538b58e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3699,14 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi) goto retry; } } - /* FIXME what if we get a false positive because these - * are being updated. - */ + if (bio_data_dir(bi) == WRITE && logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); - schedule(); + /* As the suspend_* range is controlled by + * userspace, we want an interruptible + * wait. + */ + flush_signals(current); + prepare_to_wait(&conf->wait_for_overlap, + &w, TASK_INTERRUPTIBLE); + if (logical_sector >= mddev->suspend_lo && + logical_sector < mddev->suspend_hi) + schedule(); goto retry; } From 84f7597c116f811cfcb762e645fa258b7e585155 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 1 Jul 2009 04:55:35 +0000 Subject: [PATCH 277/741] sh: fix se7724 ceu names Use "ceu0" and "ceu1" as CEU names instead of "ceu". This fixes "memchunk" kernel command line selection on the solution engine 7724 board. With this patch applied use "memchunk.ceu0=1m" or "memchunk.ceu1=1m" on kernel command line to override physically memory size to one meg for CEU0 or CEU1. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-se/7724/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 21d18005fb4a..c050a8d76dfd 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -454,7 +454,7 @@ static int __init devices_setup(void) gpio_request(GPIO_FN_VIO0_CLK, NULL); gpio_request(GPIO_FN_VIO0_FLD, NULL); gpio_request(GPIO_FN_VIO0_HD, NULL); - platform_resource_setup_memory(&ceu0_device, "ceu", 4 << 20); + platform_resource_setup_memory(&ceu0_device, "ceu0", 4 << 20); /* enable CEU1 */ gpio_request(GPIO_FN_VIO1_D7, NULL); @@ -469,7 +469,7 @@ static int __init devices_setup(void) gpio_request(GPIO_FN_VIO1_HD, NULL); gpio_request(GPIO_FN_VIO1_VD, NULL); gpio_request(GPIO_FN_VIO1_CLK, NULL); - platform_resource_setup_memory(&ceu1_device, "ceu", 4 << 20); + platform_resource_setup_memory(&ceu1_device, "ceu1", 4 << 20); /* KEYSC */ gpio_request(GPIO_FN_KEYOUT5_IN5, NULL); From 0802d9e55cc6be6bd1b13f9dc9ef5aa6d24bca77 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 1 Jul 2009 05:16:31 +0000 Subject: [PATCH 278/741] sh: re-add LCDC fbdev support to the Migo-R defconfig Re-add LCDC fbdev support to the Migo-R defconfig. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/configs/migor_defconfig | 53 +++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index da627d22c009..b18cfd39cac6 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -309,7 +309,7 @@ CONFIG_ZERO_PAGE_OFFSET=0x00001000 CONFIG_BOOT_LINK_OFFSET=0x00800000 CONFIG_ENTRY_OFFSET=0x00001000 CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ip=on root=/dev/nfs ip=dhcp" +CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 earlyprintk=serial ip=on root=/dev/nfs ip=dhcp" # # Bus options @@ -858,7 +858,35 @@ CONFIG_VIDEO_SH_MOBILE_CEU=y # # CONFIG_VGASTATE is not set # CONFIG_VIDEO_OUTPUT_CONTROL is not set -# CONFIG_FB is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +# CONFIG_FB_CFB_FILLRECT is not set +# CONFIG_FB_CFB_COPYAREA is not set +# CONFIG_FB_CFB_IMAGEBLIT is not set +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +CONFIG_FB_SYS_FILLRECT=y +CONFIG_FB_SYS_COPYAREA=y +CONFIG_FB_SYS_IMAGEBLIT=y +# CONFIG_FB_FOREIGN_ENDIAN is not set +CONFIG_FB_SYS_FOPS=y +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_S1D13XXX is not set +CONFIG_FB_SH_MOBILE_LCDC=y +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # @@ -870,6 +898,27 @@ CONFIG_VIDEO_SH_MOBILE_CEU=y # Console display driver support # CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +CONFIG_FONTS=y +# CONFIG_FONT_8x8 is not set +# CONFIG_FONT_8x16 is not set +# CONFIG_FONT_6x11 is not set +# CONFIG_FONT_7x14 is not set +# CONFIG_FONT_PEARL_8x8 is not set +# CONFIG_FONT_ACORN_8x8 is not set +CONFIG_FONT_MINI_4x6=y +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set +# CONFIG_FONT_10x18 is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +# CONFIG_LOGO_SUPERH_MONO is not set +CONFIG_LOGO_SUPERH_VGA16=y +# CONFIG_LOGO_SUPERH_CLUT224 is not set # CONFIG_SOUND is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y From 1e1689536f346a431b748dc8ad9ac0828d2c065d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Jul 2009 08:34:32 +0200 Subject: [PATCH 279/741] ALSA: hda - Add missing static to patch_ca0110() Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c index 392d108c3558..019ca7cb56d7 100644 --- a/sound/pci/hda/patch_ca0110.c +++ b/sound/pci/hda/patch_ca0110.c @@ -510,7 +510,7 @@ static int ca0110_parse_auto_config(struct hda_codec *codec) } -int patch_ca0110(struct hda_codec *codec) +static int patch_ca0110(struct hda_codec *codec) { struct ca0110_spec *spec; int err; From 9198aa77b69647d1d91207f8075763abe7dc0bf4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:13 +0200 Subject: [PATCH 280/741] perf_counter tools: Fix storage size allocation of callchain list Fix a confusion while giving the size of a callchain list during its allocation. We are using the wrong structure size. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ad3c28578961..bbf7813fefe0 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -74,7 +74,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) for (i = start; i < chain->nr; i++) { struct callchain_list *call; - call = malloc(sizeof(*chain)); + call = malloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); return; From 4424961ad6621a02c6b4c9093e801002c1bb9f65 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:14 +0200 Subject: [PATCH 281/741] perf_counter tools: Resolve symbols in callchains This patch resolves the names, when possible, of each ip present in the callchains while using the -c option with perf report. Example: 5.40% [k] __d_lookup 5.37% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat sys_access system_call_fastpath 0x7fb609846f77 0.01% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 102 +++++++++++++++++++++++------------- tools/perf/util/callchain.c | 33 +++++++----- tools/perf/util/callchain.h | 5 +- 3 files changed, 90 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3f5d8ea05ff0..197793051fa5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -794,8 +794,15 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += callchain__fprintf(fp, self->parent, total_samples); - list_for_each_entry(chain, &self->val, list) - ret += fprintf(fp, " %p\n", (void *)chain->ip); + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->sym) + ret += fprintf(fp, " %s\n", chain->sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)chain->ip); + } return ret; } @@ -930,6 +937,55 @@ static int call__match(struct symbol *sym) return 0; } +static struct symbol ** +resolve_callchain(struct thread *thread, struct map *map, + struct ip_callchain *chain, struct hist_entry *entry) +{ + int i; + struct symbol **syms; + u64 context = PERF_CONTEXT_MAX; + + if (callchain) { + syms = calloc(chain->nr, sizeof(*syms)); + if (!syms) { + fprintf(stderr, "Can't allocate memory for symbols\n"); + exit(-1); + } + } + + for (i = 0; i < chain->nr; i++) { + u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } + + sym = resolve_symbol(thread, NULL, &dso, &ip); + + if (sym) { + if (sort__has_parent && call__match(sym) && + !entry->parent) + entry->parent = sym; + if (!callchain) + break; + syms[i] = sym; + } + } + + return syms; +} + /* * collect histogram counts */ @@ -942,6 +998,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; + struct symbol **syms = NULL; struct hist_entry entry = { .thread = thread, .map = map, @@ -955,39 +1012,11 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - if (sort__has_parent && chain) { - u64 context = PERF_CONTEXT_MAX; - int i; - - for (i = 0; i < chain->nr; i++) { - u64 ip = chain->ips[i]; - struct dso *dso = NULL; - struct symbol *sym; - - if (ip >= PERF_CONTEXT_MAX) { - context = ip; - continue; - } - - switch (context) { case PERF_CONTEXT_HV: dso = hypervisor_dso; break; - case PERF_CONTEXT_KERNEL: - dso = kernel_dso; - break; - default: - break; - } - - sym = resolve_symbol(thread, NULL, &dso, &ip); - - if (sym && call__match(sym)) { - entry.parent = sym; - break; - } - } - } + if ((sort__has_parent || callchain) && chain) + syms = resolve_callchain(thread, map, chain, &entry); while (*p != NULL) { parent = *p; @@ -997,8 +1026,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; - if (callchain) - append_chain(&he->callchain, chain); + if (callchain) { + append_chain(&he->callchain, chain, syms); + free(syms); + } return 0; } @@ -1014,7 +1045,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, *he = entry; if (callchain) { callchain_init(&he->callchain); - append_chain(&he->callchain, chain); + append_chain(&he->callchain, chain, syms); + free(syms); } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index bbf7813fefe0..6568cb198ba6 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -67,7 +67,8 @@ static struct callchain_node *create_child(struct callchain_node *parent) } static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, + struct symbol **syms) { int i; @@ -80,24 +81,26 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) return; } call->ip = chain->ips[i]; + call->sym = syms[i]; list_add_tail(&call->list, &node->val); } node->val_nr = i - start; } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +static void add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *new; new = create_child(parent); - fill_node(new, chain, parent->val_nr); + fill_node(new, chain, parent->val_nr, syms); new->hit = 1; } static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx) + struct callchain_list *to_split, int idx, struct symbol **syms) { struct callchain_node *new; @@ -109,21 +112,22 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, parent->val_nr = idx; /* create the new one */ - add_child(parent, chain); + add_child(parent, chain, syms); } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start); + int start, struct symbol **syms); static int -__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr); + int ret = __append_chain(rnode, chain, root->val_nr, syms); if (!ret) return 0; } @@ -132,7 +136,7 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain) static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start) + int start, struct symbol **syms) { struct callchain_list *cnode; int i = start; @@ -154,7 +158,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match only a part of the node. Split it and add the new chain */ if (i < root->val_nr) { - split_add_child(root, chain, cnode, i); + split_add_child(root, chain, cnode, i, syms); return 0; } @@ -164,11 +168,12 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return 0; } - return __append_chain_children(root, chain); + return __append_chain_children(root, chain, syms); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain) +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { - if (__append_chain_children(root, chain) == -1) - add_child(root, chain); + if (__append_chain_children(root, chain, syms) == -1) + add_child(root, chain, syms); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fa1cd2f71fd3..c942daa712e6 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,6 +4,7 @@ #include "../perf.h" #include "list.h" #include "rbtree.h" +#include "symbol.h" struct callchain_node { @@ -18,6 +19,7 @@ struct callchain_node { struct callchain_list { unsigned long ip; + struct symbol *sym; struct list_head list; }; @@ -28,6 +30,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms); void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); #endif From deac911cbdcb124fa0cee47c588e0cb0400b23b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:15 +0200 Subject: [PATCH 282/741] perf_counter tools: Various fixes for callchains The symbol resolving has of course revealed some bugs in the callchain tree handling. This patch fixes some of them, including: - inherit the children from the parents while splitting a node - fix list range moving - fix indexes setting in callchains - create a child on the current node if the path doesn't match in the existent children (was only done on the root) - compare using symbols when possible so that we can match a function using any ip inside by referring to its start address. The practical effects are: - remove double callchains - fix upside down or any random order of callchains - fix wrong paths - fix bad hits and percentage accounts Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 122 ++++++++++++++++++++++++++---------- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 6568cb198ba6..440db12c6359 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -4,6 +4,9 @@ * Handle the callchains from the stream in an ad-hoc radix tree and then * sort them in an rbtree. * + * Using a radix for code path provides a fast retrieval and factorizes + * memory use. Also that lets us use the paths in a hierarchical graph view. + * */ #include @@ -14,7 +17,8 @@ #include "callchain.h" -static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +static void +rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -49,7 +53,12 @@ void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) rb_insert_callchain(rb_root, node); } -static struct callchain_node *create_child(struct callchain_node *parent) +/* + * Create a child for a parent. If inherit_children, then the new child + * will become the new parent of it's parent children + */ +static struct callchain_node * +create_child(struct callchain_node *parent, bool inherit_children) { struct callchain_node *new; @@ -61,14 +70,27 @@ static struct callchain_node *create_child(struct callchain_node *parent) new->parent = parent; INIT_LIST_HEAD(&new->children); INIT_LIST_HEAD(&new->val); + + if (inherit_children) { + struct callchain_node *next; + + list_splice(&parent->children, &new->children); + INIT_LIST_HEAD(&parent->children); + + list_for_each_entry(next, &new->children, brothers) + next->parent = new; + } list_add_tail(&new->brothers, &parent->children); return new; } +/* + * Fill the node with callchain values + */ static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, - struct symbol **syms) +fill_node(struct callchain_node *node, struct ip_callchain *chain, + int start, struct symbol **syms) { int i; @@ -84,54 +106,80 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, call->sym = syms[i]; list_add_tail(&call->list, &node->val); } - node->val_nr = i - start; + node->val_nr = chain->nr - start; + if (!node->val_nr) + printf("Warning: empty node in callchain tree\n"); } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct symbol **syms) +static void +add_child(struct callchain_node *parent, struct ip_callchain *chain, + int start, struct symbol **syms) { struct callchain_node *new; - new = create_child(parent); - fill_node(new, chain, parent->val_nr, syms); + new = create_child(parent, false); + fill_node(new, chain, start, syms); new->hit = 1; } +/* + * Split the parent in two parts (a new child is created) and + * give a part of its callchain to the created child. + * Then create another child to host the given callchain of new branch + */ static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx, struct symbol **syms) + struct callchain_list *to_split, int idx_parents, int idx_local, + struct symbol **syms) { struct callchain_node *new; + struct list_head *old_tail; + int idx_total = idx_parents + idx_local; /* split */ - new = create_child(parent); - list_move_tail(&to_split->list, &new->val); - new->hit = parent->hit; - parent->hit = 0; - parent->val_nr = idx; + new = create_child(parent, true); - /* create the new one */ - add_child(parent, chain, syms); + /* split the callchain and move a part to the new child */ + old_tail = parent->val.prev; + list_del_range(&to_split->list, old_tail); + new->val.next = &to_split->list; + new->val.prev = old_tail; + to_split->list.prev = &new->val; + old_tail->next = &new->val; + + /* split the hits */ + new->hit = parent->hit; + new->val_nr = parent->val_nr - idx_local; + parent->val_nr = idx_local; + + /* create a new child for the new branch if any */ + if (idx_total < chain->nr) { + parent->hit = 0; + add_child(parent, chain, idx_total, syms); + } else { + parent->hit = 1; + } } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, int start, struct symbol **syms); -static int +static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms) + struct symbol **syms, int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr, syms); + int ret = __append_chain(rnode, chain, start, syms); if (!ret) - return 0; + return; } - return -1; + /* nothing in children, add to the current node */ + add_child(root, chain, start, syms); } static int @@ -142,14 +190,22 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, int i = start; bool found = false; - /* lookup in the current node */ + /* + * Lookup in the current node + * If we have a symbol, then compare the start to match + * anywhere inside a function. + */ list_for_each_entry(cnode, &root->val, list) { - if (cnode->ip != chain->ips[i++]) + if (i == chain->nr) + break; + if (cnode->sym && syms[i]) { + if (cnode->sym->start != syms[i]->start) + break; + } else if (cnode->ip != chain->ips[i]) break; if (!found) found = true; - if (i == chain->nr) - break; + i++; } /* matches not, relay on the parent */ @@ -157,23 +213,25 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return -1; /* we match only a part of the node. Split it and add the new chain */ - if (i < root->val_nr) { - split_add_child(root, chain, cnode, i, syms); + if (i - start < root->val_nr) { + split_add_child(root, chain, cnode, start, i - start, syms); return 0; } /* we match 100% of the path, increment the hit */ - if (i == root->val_nr) { + if (i - start == root->val_nr && i == chain->nr) { root->hit++; return 0; } - return __append_chain_children(root, chain, syms); + /* We match the node and still have a part remaining */ + __append_chain_children(root, chain, syms, i); + + return 0; } void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { - if (__append_chain_children(root, chain, syms) == -1) - add_child(root, chain, syms); + __append_chain_children(root, chain, syms, 0); } From 0a456fc58fb8ef3c53d18297ab5cd5d2a70d146b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 13:07:01 +1000 Subject: [PATCH 283/741] powerpc/perf_counter: Enable alternate PR/HV bits for POWER7 POWER7 has the same PR/HV bit layout as POWER6, so set the flag. Signed-off-by: Anton Blanchard Acked-by: Paul Mackerras Cc: a.p.zijlstra@chello.nl Cc: benh@kernel.crashing.org LKML-Reference: <20090701030701.GI3563@kryten> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power7-pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 5d755ef7ac8f..5a9f5cbd40a4 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -358,6 +358,7 @@ static struct power_pmu power7_pmu = { .get_constraint = power7_get_constraint, .get_alternatives = power7_get_alternatives, .disable_pmc = power7_disable_pmc, + .flags = PPMU_ALT_SIPR, .n_generic = ARRAY_SIZE(power7_generic_events), .generic_events = power7_generic_events, .cache_events = &power7_cache_events, From 61c45981ddbd718136d49464f00d2f11938aaa6e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 1 Jul 2009 13:04:34 +1000 Subject: [PATCH 284/741] perf_counter tools: Rework event string parsing/syntax This reworks the parser for event descriptors to make it more consistent in what it accepts. It is now structured as a recursive descent parser for the following grammar: events ::= event ( ("," | space) space* event )* event ::= ( raw_event | numeric_event | symbolic_event | generic_hw_event ) [ event_modifier ] raw_event ::= "r" hex_number numeric_event ::= number ":" number number ::= decimal_number | "0x" hex_number | "0" octal_number symbolic_event ::= string_from_event_symbols_array generic_hw_event::= cache_type ( "-" ( cache_op | cache_result ) )* event_modifier ::= ":" ( "u" | "k" | "h" )+ with the extra restriction that you can have at most one cache_op and at most one cache_result. We pass the current string pointer by reference (i.e. as a const char **) to the various parsing functions so that they can advance the pointer to indicate how much they consumed. They return 0 if they didn't recognize the thing at the pointer or 1 if they did (and advance the pointer past it). This also fixes parse_aliases to take the longest matching alias from the table, not the first one. Otherwise "l1-data" would match the "l1-d" alias and the "ata" would not be consumed. This allows event modifiers indicating what processor modes to count in to be applied to any event, not just numeric events, and adds a ":h" modifier to indicate counting in hypervisor mode. Specifying ":u" now sets both exclude_kernel and exclude_hv, and so on. Multiple modes can be specified, e.g. ":uk" will count in user or hypervisor mode (i.e. only exclude_kernel will be set). Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <19018.53826.843815.189847@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 242 ++++++++++++++++++++++----------- 1 file changed, 165 insertions(+), 77 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4d042f104cdc..e6b83a3311a5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -184,16 +184,20 @@ char *event_name(int counter) return "unknown"; } -static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) +static int parse_aliases(const char **str, char *names[][MAX_ALIASES], int size) { int i, j; + int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES; j++) { - if (!names[i][j]) - break; - if (strcasestr(str, names[i][j])) - return i; + for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { + n = strlen(names[i][j]); + if (n > longest && !strncasecmp(*str, names[i][j], n)) + longest = n; + } + if (longest > 0) { + *str += longest; + return i; } } @@ -201,30 +205,53 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) } static int -parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) { - int cache_type = -1, cache_op = 0, cache_result = 0; + const char *s = *str; + int cache_type = -1, cache_op = -1, cache_result = -1; - cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(&s, hw_cache, PERF_COUNT_HW_CACHE_MAX); /* * No fallback - if we cannot get a clear cache type * then bail out: */ if (cache_type == -1) - return -EINVAL; + return 0; + + while ((cache_op == -1 || cache_result == -1) && *s == '-') { + ++s; + + if (cache_op == -1) { + cache_op = parse_aliases(&s, hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX); + if (cache_op >= 0) { + if (!is_cache_op_valid(cache_type, cache_op)) + return 0; + continue; + } + } + + if (cache_result == -1) { + cache_result = parse_aliases(&s, hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); + if (cache_result >= 0) + continue; + } + + /* + * Can't parse this as a cache op or result, so back up + * to the '-'. + */ + --s; + break; + } - cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); /* * Fall back to reads: */ if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; - if (!is_cache_op_valid(cache_type, cache_op)) - return -EINVAL; - - cache_result = parse_aliases(str, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); /* * Fall back to accesses: */ @@ -234,19 +261,110 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) attr->config = cache_type | (cache_op << 8) | (cache_result << 16); attr->type = PERF_TYPE_HW_CACHE; - return 0; + *str = s; + return 1; } static int check_events(const char *str, unsigned int i) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return 1; + int n; - if (strlen(event_symbols[i].alias)) - if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) + n = strlen(event_symbols[i].symbol); + if (!strncmp(str, event_symbols[i].symbol, n)) + return n; + + n = strlen(event_symbols[i].alias); + if (n) + if (!strncmp(str, event_symbols[i].alias, n)) + return n; + return 0; +} + +static int +parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + unsigned int i; + int n; + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + n = check_events(str, i); + if (n > 0) { + attr->type = event_symbols[i].type; + attr->config = event_symbols[i].config; + *strp = str + n; return 1; + } + } + return 0; +} + +static int parse_raw_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + u64 config; + int n; + + if (*str != 'r') + return 0; + n = hex2u64(str + 1, &config); + if (n > 0) { + *strp = str + n + 1; + attr->type = PERF_TYPE_RAW; + attr->config = config; + return 1; + } + return 0; +} + +static int +parse_numeric_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + char *endp; + unsigned long type; + u64 config; + + type = strtoul(str, &endp, 0); + if (endp > str && type < PERF_TYPE_MAX && *endp == ':') { + str = endp + 1; + config = strtoul(str, &endp, 0); + if (endp > str) { + attr->type = type; + attr->config = config; + *strp = endp; + return 1; + } + } + return 0; +} + +static int +parse_event_modifier(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + int eu = 1, ek = 1, eh = 1; + + if (*str++ != ':') + return 0; + while (*str) { + if (*str == 'u') + eu = 0; + else if (*str == 'k') + ek = 0; + else if (*str == 'h') + eh = 0; + else + break; + ++str; + } + if (str >= *strp + 2) { + *strp = str; + attr->exclude_user = eu; + attr->exclude_kernel = ek; + attr->exclude_hv = eh; + return 1; + } return 0; } @@ -254,73 +372,43 @@ static int check_events(const char *str, unsigned int i) * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. */ -static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) +static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) { - u64 config, id; - int type; - unsigned int i; - const char *sep, *pstr; - - if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { - attr->type = PERF_TYPE_RAW; - attr->config = config; - + if (!(parse_raw_event(str, attr) || + parse_numeric_event(str, attr) || + parse_symbolic_event(str, attr) || + parse_generic_hw_event(str, attr))) return 0; - } - pstr = str; - sep = strchr(pstr, ':'); - if (sep) { - type = atoi(pstr); - pstr = sep + 1; - id = atoi(pstr); - sep = strchr(pstr, ':'); - if (sep) { - pstr = sep + 1; - if (strchr(pstr, 'k')) - attr->exclude_user = 1; - if (strchr(pstr, 'u')) - attr->exclude_kernel = 1; - } - attr->type = type; - attr->config = id; + parse_event_modifier(str, attr); - return 0; - } - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (check_events(str, i)) { - attr->type = event_symbols[i].type; - attr->config = event_symbols[i].config; - - return 0; - } - } - - return parse_generic_hw_symbols(str, attr); + return 1; } int parse_events(const struct option *opt, const char *str, int unset) { struct perf_counter_attr attr; - int ret; - memset(&attr, 0, sizeof(attr)); -again: - if (nr_counters == MAX_COUNTERS) - return -1; + for (;;) { + if (nr_counters == MAX_COUNTERS) + return -1; - ret = parse_event_symbols(str, &attr); - if (ret < 0) - return ret; + memset(&attr, 0, sizeof(attr)); + if (!parse_event_symbols(&str, &attr)) + return -1; - attrs[nr_counters] = attr; - nr_counters++; + if (!(*str == 0 || *str == ',' || isspace(*str))) + return -1; - str = strstr(str, ","); - if (str) { - str++; - goto again; + attrs[nr_counters] = attr; + nr_counters++; + + if (*str == 0) + break; + if (*str == ',') + ++str; + while (isspace(*str)) + ++str; } return 0; From 4a2bb6fcc80e6330ca2f2393e98605052cc7780b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 30 Jun 2009 17:08:09 -0400 Subject: [PATCH 285/741] kprobes: No need to unlock kprobe_insn_mutex Remove needless kprobe_insn_mutex unlocking during safety check in garbage collection, because if someone releases a dirty slot during safety check (which ensures other cpus doesn't execute all dirty slots), the safety check must be fail. So, we need to hold the mutex while checking safety. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli LKML-Reference: <20090630210809.17851.28781.stgit@localhost.localdomain> Signed-off-by: Ingo Molnar --- kernel/kprobes.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c0fa54b276d9..16b5739c516a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void) { struct kprobe_insn_page *kip; struct hlist_node *pos, *next; - int safety; /* Ensure no-one is preepmted on the garbages */ - mutex_unlock(&kprobe_insn_mutex); - safety = check_safety(); - mutex_lock(&kprobe_insn_mutex); - if (safety != 0) + if (check_safety()) return -EAGAIN; hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { From 61fd21670d048017c81e62f60894ef1b04b481db Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 25 Jun 2009 13:03:04 +0200 Subject: [PATCH 286/741] Trivial typo fixes in Documentation/block/data-integrity.txt. Signed-off-by: Andre Noll Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- Documentation/block/data-integrity.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt index e8ca040ba2cf..2d735b0ae383 100644 --- a/Documentation/block/data-integrity.txt +++ b/Documentation/block/data-integrity.txt @@ -50,7 +50,7 @@ encouraged them to allow separation of the data and integrity metadata scatter-gather lists. The controller will interleave the buffers on write and split them on -read. This means that the Linux can DMA the data buffers to and from +read. This means that Linux can DMA the data buffers to and from host memory without changes to the page cache. Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs @@ -66,7 +66,7 @@ software RAID5). The IP checksum is weaker than the CRC in terms of detecting bit errors. However, the strength is really in the separation of the data -buffers and the integrity metadata. These two distinct buffers much +buffers and the integrity metadata. These two distinct buffers must match up for an I/O to complete. The separation of the data and integrity metadata buffers as well as From d5036d770f871bd34c9cfd955e6dee692e1e8e81 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Jun 2009 10:44:34 +0200 Subject: [PATCH 287/741] cfq-iosched: move cfqq initialization out of cfq_find_alloc_queue() We're going to be needing that init code outside of that function to get rid of the __GFP_NOFAIL in cfqq allocation. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 833ec18eaa63..c760ae7019dd 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1641,6 +1641,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) ioc->ioprio_changed = 0; } +static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + pid_t pid, int is_sync) +{ + RB_CLEAR_NODE(&cfqq->rb_node); + RB_CLEAR_NODE(&cfqq->p_node); + INIT_LIST_HEAD(&cfqq->fifo); + + atomic_set(&cfqq->ref, 0); + cfqq->cfqd = cfqd; + + cfq_mark_cfqq_prio_changed(cfqq); + + if (is_sync) { + if (!cfq_class_idle(cfqq)) + cfq_mark_cfqq_idle_window(cfqq); + cfq_mark_cfqq_sync(cfqq); + } + cfqq->pid = pid; +} + static struct cfq_queue * cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, gfp_t gfp_mask) @@ -1678,23 +1698,8 @@ retry: goto out; } - RB_CLEAR_NODE(&cfqq->rb_node); - RB_CLEAR_NODE(&cfqq->p_node); - INIT_LIST_HEAD(&cfqq->fifo); - - atomic_set(&cfqq->ref, 0); - cfqq->cfqd = cfqd; - - cfq_mark_cfqq_prio_changed(cfqq); - + cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); cfq_init_prio_data(cfqq, ioc); - - if (is_sync) { - if (!cfq_class_idle(cfqq)) - cfq_mark_cfqq_idle_window(cfqq); - cfq_mark_cfqq_sync(cfqq); - } - cfqq->pid = current->pid; cfq_log_cfqq(cfqd, cfqq, "alloced"); } From 6118b70b3a0b4c583439bb77600194c82f220ce3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Jun 2009 09:34:12 +0200 Subject: [PATCH 288/741] cfq-iosched: get rid of the need for __GFP_NOFAIL in cfq_find_alloc_queue() Setup an emergency fallback cfqq that we allocate at IO scheduler init time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue() never fails without having to ensure free memory. On cfqq lookup, always try to allocate a new cfqq if the given cfq io context has the oom_cfqq assigned. This ensures that we only temporarily punt to this shared queue. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 137 +++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c760ae7019dd..1d9160ffa26d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -70,6 +70,51 @@ struct cfq_rb_root { }; #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } +/* + * Per process-grouping structure + */ +struct cfq_queue { + /* reference count */ + atomic_t ref; + /* various state flags, see below */ + unsigned int flags; + /* parent cfq_data */ + struct cfq_data *cfqd; + /* service_tree member */ + struct rb_node rb_node; + /* service_tree key */ + unsigned long rb_key; + /* prio tree member */ + struct rb_node p_node; + /* prio tree root we belong to, if any */ + struct rb_root *p_root; + /* sorted list of pending requests */ + struct rb_root sort_list; + /* if fifo isn't expired, next request to serve */ + struct request *next_rq; + /* requests queued in sort_list */ + int queued[2]; + /* currently allocated requests */ + int allocated[2]; + /* fifo list of requests in sort_list */ + struct list_head fifo; + + unsigned long slice_end; + long slice_resid; + unsigned int slice_dispatch; + + /* pending metadata requests */ + int meta_pending; + /* number of requests that are on the dispatch list or inside driver */ + int dispatched; + + /* io prio of this group */ + unsigned short ioprio, org_ioprio; + unsigned short ioprio_class, org_ioprio_class; + + pid_t pid; +}; + /* * Per block device queue structure */ @@ -135,51 +180,11 @@ struct cfq_data { unsigned int cfq_slice_idle; struct list_head cic_list; -}; -/* - * Per process-grouping structure - */ -struct cfq_queue { - /* reference count */ - atomic_t ref; - /* various state flags, see below */ - unsigned int flags; - /* parent cfq_data */ - struct cfq_data *cfqd; - /* service_tree member */ - struct rb_node rb_node; - /* service_tree key */ - unsigned long rb_key; - /* prio tree member */ - struct rb_node p_node; - /* prio tree root we belong to, if any */ - struct rb_root *p_root; - /* sorted list of pending requests */ - struct rb_root sort_list; - /* if fifo isn't expired, next request to serve */ - struct request *next_rq; - /* requests queued in sort_list */ - int queued[2]; - /* currently allocated requests */ - int allocated[2]; - /* fifo list of requests in sort_list */ - struct list_head fifo; - - unsigned long slice_end; - long slice_resid; - unsigned int slice_dispatch; - - /* pending metadata requests */ - int meta_pending; - /* number of requests that are on the dispatch list or inside driver */ - int dispatched; - - /* io prio of this group */ - unsigned short ioprio, org_ioprio; - unsigned short ioprio_class, org_ioprio_class; - - pid_t pid; + /* + * Fallback dummy cfqq for extreme OOM conditions + */ + struct cfq_queue oom_cfqq; }; enum cfqq_state_flags { @@ -1673,41 +1678,40 @@ retry: /* cic always exists here */ cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + /* + * Always try a new alloc if we fell back to the OOM cfqq + * originally, since it should just be a temporary situation. + */ + if (!cfqq || cfqq == &cfqd->oom_cfqq) { + cfqq = NULL; if (new_cfqq) { cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { - /* - * Inform the allocator of the fact that we will - * just repeat this allocation if it fails, to allow - * the allocator to do whatever it needs to attempt to - * free memory. - */ spin_unlock_irq(cfqd->queue->queue_lock); new_cfqq = kmem_cache_alloc_node(cfq_pool, - gfp_mask | __GFP_NOFAIL | __GFP_ZERO, + gfp_mask | __GFP_ZERO, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); - goto retry; + if (new_cfqq) + goto retry; } else { cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask | __GFP_ZERO, cfqd->queue->node); - if (!cfqq) - goto out; } - cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); - cfq_init_prio_data(cfqq, ioc); - cfq_log_cfqq(cfqd, cfqq, "alloced"); + if (cfqq) { + cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); + cfq_init_prio_data(cfqq, ioc); + cfq_log_cfqq(cfqd, cfqq, "alloced"); + } else + cfqq = &cfqd->oom_cfqq; } if (new_cfqq) kmem_cache_free(cfq_pool, new_cfqq); -out: - WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); return cfqq; } @@ -1740,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, cfqq = *async_cfqq; } - if (!cfqq) { + if (!cfqq) cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); - if (!cfqq) - return NULL; - } /* * pin the queue now that it's allocated, scheduler exit will prune it @@ -2470,6 +2471,14 @@ static void *cfq_init_queue(struct request_queue *q) for (i = 0; i < CFQ_PRIO_LISTS; i++) cfqd->prio_trees[i] = RB_ROOT; + /* + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues. + * Grab a permanent reference to it, so that the normal code flow + * will not attempt to free it. + */ + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); + atomic_inc(&cfqd->oom_cfqq.ref); + INIT_LIST_HEAD(&cfqd->cic_list); cfqd->queue = q; From 7878cba9f0037f5599004b03a1260b32d9050360 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Jun 2009 15:37:49 +0200 Subject: [PATCH 289/741] block: Create bip slabs with embedded integrity vectors This patch restores stacking ability to the block layer integrity infrastructure by creating a set of dedicated bip slabs. Each bip slab has an embedded bio_vec array at the end. This cuts down on memory allocations and also simplifies the code compared to the original bvec version. Only the largest bip slab is backed by a mempool. The pool is contained in the bio_set so stacking drivers can ensure forward progress. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- drivers/md/dm.c | 4 +- fs/bio-integrity.c | 190 +++++++++++++++++++++++++++++++------------- fs/bio.c | 11 ++- include/linux/bio.h | 22 +++-- 5 files changed, 162 insertions(+), 67 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b06cf5c2a829..345d99da8d41 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2365,7 +2365,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, __bio_clone(bio, bio_src); if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask)) + bio_integrity_clone(bio, bio_src, gfp_mask, bs)) goto free_and_out; if (bio_ctr && bio_ctr(bio, bio_src, data)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c6d4ee8921d..9acd54a5cffb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_flags |= 1 << BIO_CLONED; if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_clone(clone, bio, GFP_NOIO, bs); bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); } @@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone->bi_flags &= ~(1 << BIO_SEG_VALID); if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_clone(clone, bio, GFP_NOIO, bs); if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) bio_integrity_trim(clone, diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 31c46a241bac..49a34e7f7306 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -1,7 +1,7 @@ /* * bio-integrity.c - bio data integrity extensions * - * Copyright (C) 2007, 2008 Oracle Corporation + * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen * * This program is free software; you can redistribute it and/or @@ -25,11 +25,94 @@ #include #include -static struct kmem_cache *bio_integrity_slab __read_mostly; -static mempool_t *bio_integrity_pool; -static struct bio_set *integrity_bio_set; +struct integrity_slab { + struct kmem_cache *slab; + unsigned short nr_vecs; + char name[8]; +}; + +#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } +struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { + IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), +}; +#undef IS + static struct workqueue_struct *kintegrityd_wq; +static inline unsigned int vecs_to_idx(unsigned int nr) +{ + switch (nr) { + case 1: + return 0; + case 2 ... 4: + return 1; + case 5 ... 16: + return 2; + case 17 ... 64: + return 3; + case 65 ... 128: + return 4; + case 129 ... BIO_MAX_PAGES: + return 5; + default: + BUG(); + } +} + +static inline int use_bip_pool(unsigned int idx) +{ + if (idx == BIOVEC_NR_POOLS) + return 1; + + return 0; +} + +/** + * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio + * @bio: bio to attach integrity metadata to + * @gfp_mask: Memory allocation mask + * @nr_vecs: Number of integrity metadata scatter-gather elements + * @bs: bio_set to allocate from + * + * Description: This function prepares a bio for attaching integrity + * metadata. nr_vecs specifies the maximum number of pages containing + * integrity metadata that can be attached. + */ +struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs, + struct bio_set *bs) +{ + struct bio_integrity_payload *bip; + unsigned int idx = vecs_to_idx(nr_vecs); + + BUG_ON(bio == NULL); + bip = NULL; + + /* Lower order allocations come straight from slab */ + if (!use_bip_pool(idx)) + bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); + + /* Use mempool if lower order alloc failed or max vecs were requested */ + if (bip == NULL) { + bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); + + if (unlikely(bip == NULL)) { + printk(KERN_ERR "%s: could not alloc bip\n", __func__); + return NULL; + } + } + + memset(bip, 0, sizeof(*bip)); + + bip->bip_slab = idx; + bip->bip_bio = bio; + bio->bi_integrity = bip; + + return bip; +} +EXPORT_SYMBOL(bio_integrity_alloc_bioset); + /** * bio_integrity_alloc - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to @@ -44,44 +127,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp_mask, unsigned int nr_vecs) { - struct bio_integrity_payload *bip; - struct bio_vec *iv; - unsigned long idx; - - BUG_ON(bio == NULL); - - bip = mempool_alloc(bio_integrity_pool, gfp_mask); - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } - - memset(bip, 0, sizeof(*bip)); - - iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); - if (unlikely(iv == NULL)) { - printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); - mempool_free(bip, bio_integrity_pool); - return NULL; - } - - bip->bip_pool = idx; - bip->bip_vec = iv; - bip->bip_bio = bio; - bio->bi_integrity = bip; - - return bip; + return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); } EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed + * @bs: bio_set this bio was allocated from * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +void bio_integrity_free(struct bio *bio, struct bio_set *bs) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio) && bip->bip_buf != NULL) kfree(bip->bip_buf); - bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); - mempool_free(bip, bio_integrity_pool); + if (use_bip_pool(bip->bip_slab)) + mempool_free(bip, bs->bio_integrity_pool); + else + kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); bio->bi_integrity = NULL; } @@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { + if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec = &bp->iv1; - bp->bip2.bip_vec = &bp->iv2; + bp->bip1.bip_vec[0] = bp->iv1; + bp->bip2.bip_vec[0] = bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; @@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split); * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask + * @bs: bio_set to allocate bip from * * Description: Called to allocate a bip when cloning a bio */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) { struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); - bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); + bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); if (bip == NULL) return -EIO; @@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) } EXPORT_SYMBOL(bio_integrity_clone); -static int __init bio_integrity_init(void) +int bioset_integrity_create(struct bio_set *bs, int pool_size) { - kintegrityd_wq = create_workqueue("kintegrityd"); + unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); - if (!kintegrityd_wq) - panic("Failed to create kintegrityd\n"); + bs->bio_integrity_pool = + mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); - bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, - SLAB_HWCACHE_ALIGN|SLAB_PANIC); - - bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, - bio_integrity_slab); - if (!bio_integrity_pool) - panic("bio_integrity: can't allocate bip pool\n"); - - integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); - if (!integrity_bio_set) - panic("bio_integrity: can't allocate bio_set\n"); + if (!bs->bio_integrity_pool) + return -1; return 0; } -subsys_initcall(bio_integrity_init); +EXPORT_SYMBOL(bioset_integrity_create); + +void bioset_integrity_free(struct bio_set *bs) +{ + if (bs->bio_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); +} +EXPORT_SYMBOL(bioset_integrity_free); + +void __init bio_integrity_init(void) +{ + unsigned int i; + + kintegrityd_wq = create_workqueue("kintegrityd"); + if (!kintegrityd_wq) + panic("Failed to create kintegrityd\n"); + + for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { + unsigned int size; + + size = sizeof(struct bio_integrity_payload) + + bip_slab[i].nr_vecs * sizeof(struct bio_vec); + + bip_slab[i].slab = + kmem_cache_create(bip_slab[i].name, size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + } +} diff --git a/fs/bio.c b/fs/bio.c index 24c914043532..1486b19fc431 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, bs); /* * If we have front padding, adjust the bio pointer before freeing @@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) static void bio_kmalloc_destructor(struct bio *bio) { if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, fs_bio_set); kfree(bio); } @@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask); + ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); if (ret < 0) { bio_put(b); @@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + bioset_integrity_free(bs); biovec_free_pools(bs); bio_put_slab(bs); @@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; + if (bioset_integrity_create(bs, pool_size)) + goto bad; + if (!biovec_create_pools(bs, pool_size)) return bs; @@ -1616,6 +1620,7 @@ static int __init init_bio(void) if (!bio_slabs) panic("bio: can't allocate bios\n"); + bio_integrity_init(); biovec_init_slabs(); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); diff --git a/include/linux/bio.h b/include/linux/bio.h index 2a04eb54c0dd..2892b710771c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -319,7 +319,6 @@ static inline int bio_has_allocated_vec(struct bio *bio) */ struct bio_integrity_payload { struct bio *bip_bio; /* parent bio */ - struct bio_vec *bip_vec; /* integrity data vector */ sector_t bip_sector; /* virtual start sector */ @@ -328,11 +327,12 @@ struct bio_integrity_payload { unsigned int bip_size; - unsigned short bip_pool; /* pool the ivec came from */ + unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_idx; /* current bip_vec index */ struct work_struct bip_work; /* I/O completion */ + struct bio_vec bip_vec[0]; /* embedded bvec array */ }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -430,6 +430,9 @@ struct bio_set { unsigned int front_pad; mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif mempool_t *bvec_pool; }; @@ -634,8 +637,9 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bio_integrity(bio) (bio->bi_integrity != NULL) +extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *); +extern void bio_integrity_free(struct bio *, struct bio_set *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern int bio_integrity_enabled(struct bio *bio); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); @@ -645,21 +649,27 @@ extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int); -extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); +extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); +extern int bioset_integrity_create(struct bio_set *, int); +extern void bioset_integrity_free(struct bio_set *); +extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ #define bio_integrity(a) (0) +#define bioset_integrity_create(a, b) (0) #define bio_integrity_prep(a) (0) #define bio_integrity_enabled(a) (0) -#define bio_integrity_clone(a, b, c) (0) -#define bio_integrity_free(a) do { } while (0) +#define bio_integrity_clone(a, b, c, d) (0) +#define bioset_integrity_free(a) do { } while (0) +#define bio_integrity_free(a, b) do { } while (0) #define bio_integrity_endio(a, b) do { } while (0) #define bio_integrity_advance(a, b) do { } while (0) #define bio_integrity_trim(a, b, c) do { } while (0) #define bio_integrity_split(a, b, c) do { } while (0) #define bio_integrity_set_tag(a, b, c) do { } while (0) #define bio_integrity_get_tag(a, b, c) do { } while (0) +#define bio_integrity_init(a) do { } while (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ From 018e0446890661504783f92388ecce7138c1566d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Jun 2009 16:27:10 +0200 Subject: [PATCH 290/741] block: get rid of queue-private command filter The initial patches to support this through sysfs export were broken and have been if 0'ed out in any release. So lets just kill the code and reclaim some space in struct request_queue, if anyone would later like to fixup the sysfs bits, the git history can easily restore the removed bits. Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-core.c | 2 - block/bsg.c | 2 +- block/cmd-filter.c | 233 ----------------------------------------- block/scsi_ioctl.c | 43 +++++++- drivers/scsi/sg.c | 4 +- include/linux/blkdev.h | 15 +-- 7 files changed, 42 insertions(+), 259 deletions(-) delete mode 100644 block/cmd-filter.c diff --git a/block/Makefile b/block/Makefile index e9fa4dd690f2..6c54ed0ff755 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - ioctl.o genhd.o scsi_ioctl.o cmd-filter.o + ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index 345d99da8d41..02b87134a167 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -595,8 +595,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->sg_reserved_size = INT_MAX; - blk_set_cmd_filter_defaults(&q->cmd_filter); - /* * all done */ diff --git a/block/bsg.c b/block/bsg.c index e7d475254248..5f184bb3ff9e 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, return -EFAULT; if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { - if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm)) + if (blk_verify_command(rq->cmd, has_write_perm)) return -EPERM; } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; diff --git a/block/cmd-filter.c b/block/cmd-filter.c deleted file mode 100644 index 572bbc2f900d..000000000000 --- a/block/cmd-filter.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright 2004 Peter M. Jones - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - * - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, fmode_t has_write_perm) -{ - /* root can do any command. */ - if (capable(CAP_SYS_RAWIO)) - return 0; - - /* if there's no filter set, assume we're filtering everything out */ - if (!filter) - return -EPERM; - - /* Anybody who can open the device can do a read-safe command */ - if (test_bit(cmd[0], filter->read_ok)) - return 0; - - /* Write-safe commands require a writable open */ - if (test_bit(cmd[0], filter->write_ok) && has_write_perm) - return 0; - - return -EPERM; -} -EXPORT_SYMBOL(blk_verify_command); - -#if 0 -/* and now, the sysfs stuff */ -static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page, - int rw) -{ - char *npage = page; - unsigned long *okbits; - int i; - - if (rw == READ) - okbits = filter->read_ok; - else - okbits = filter->write_ok; - - for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) { - if (test_bit(i, okbits)) { - npage += sprintf(npage, "0x%02x", i); - if (i < BLK_SCSI_MAX_CMDS - 1) - sprintf(npage++, " "); - } - } - - if (npage != page) - npage += sprintf(npage, "\n"); - - return npage - page; -} - -static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page) -{ - return rcf_cmds_show(filter, page, READ); -} - -static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter, - char *page) -{ - return rcf_cmds_show(filter, page, WRITE); -} - -static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count, int rw) -{ - unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits; - int cmd, set; - char *p, *status; - - if (rw == READ) { - memcpy(&okbits, filter->read_ok, sizeof(okbits)); - target_okbits = filter->read_ok; - } else { - memcpy(&okbits, filter->write_ok, sizeof(okbits)); - target_okbits = filter->write_ok; - } - - while ((p = strsep((char **)&page, " ")) != NULL) { - set = 1; - - if (p[0] == '+') { - p++; - } else if (p[0] == '-') { - set = 0; - p++; - } - - cmd = simple_strtol(p, &status, 16); - - /* either of these cases means invalid input, so do nothing. */ - if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS) - return -EINVAL; - - if (set) - __set_bit(cmd, okbits); - else - __clear_bit(cmd, okbits); - } - - memcpy(target_okbits, okbits, sizeof(okbits)); - return count; -} - -static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count) -{ - return rcf_cmds_store(filter, page, count, READ); -} - -static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter, - const char *page, size_t count) -{ - return rcf_cmds_store(filter, page, count, WRITE); -} - -struct rcf_sysfs_entry { - struct attribute attr; - ssize_t (*show)(struct blk_cmd_filter *, char *); - ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t); -}; - -static struct rcf_sysfs_entry rcf_readcmds_entry = { - .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR }, - .show = rcf_readcmds_show, - .store = rcf_readcmds_store, -}; - -static struct rcf_sysfs_entry rcf_writecmds_entry = { - .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR }, - .show = rcf_writecmds_show, - .store = rcf_writecmds_store, -}; - -static struct attribute *default_attrs[] = { - &rcf_readcmds_entry.attr, - &rcf_writecmds_entry.attr, - NULL, -}; - -#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr) - -static ssize_t -rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct rcf_sysfs_entry *entry = to_rcf(attr); - struct blk_cmd_filter *filter; - - filter = container_of(kobj, struct blk_cmd_filter, kobj); - if (entry->show) - return entry->show(filter, page); - - return 0; -} - -static ssize_t -rcf_attr_store(struct kobject *kobj, struct attribute *attr, - const char *page, size_t length) -{ - struct rcf_sysfs_entry *entry = to_rcf(attr); - struct blk_cmd_filter *filter; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!entry->store) - return -EINVAL; - - filter = container_of(kobj, struct blk_cmd_filter, kobj); - return entry->store(filter, page, length); -} - -static struct sysfs_ops rcf_sysfs_ops = { - .show = rcf_attr_show, - .store = rcf_attr_store, -}; - -static struct kobj_type rcf_ktype = { - .sysfs_ops = &rcf_sysfs_ops, - .default_attrs = default_attrs, -}; - -int blk_register_filter(struct gendisk *disk) -{ - int ret; - struct blk_cmd_filter *filter = &disk->queue->cmd_filter; - - ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, - &disk_to_dev(disk)->kobj, - "%s", "cmd_filter"); - if (ret < 0) - return ret; - - return 0; -} -EXPORT_SYMBOL(blk_register_filter); - -void blk_unregister_filter(struct gendisk *disk) -{ - struct blk_cmd_filter *filter = &disk->queue->cmd_filter; - - kobject_put(&filter->kobj); -} -EXPORT_SYMBOL(blk_unregister_filter); -#endif diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 5f8e798ede4e..f0e0ce0a607d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -32,6 +32,11 @@ #include #include +struct blk_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; +} blk_default_cmd_filter; + /* Command group 3 is reserved and should never be used. */ const unsigned char scsi_command_size_tbl[8] = { @@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p) return put_user(1, p); } -void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) +static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) { /* Basic read-only commands */ __set_bit(TEST_UNIT_READY, filter->read_ok); @@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(GPCMD_SET_STREAMING, filter->write_ok); __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); } -EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); + +int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) +{ + struct blk_cmd_filter *filter = &blk_default_cmd_filter; + + /* root can do any command. */ + if (capable(CAP_SYS_RAWIO)) + return 0; + + /* if there's no filter set, assume we're filtering everything out */ + if (!filter) + return -EPERM; + + /* Anybody who can open the device can do a read-safe command */ + if (test_bit(cmd[0], filter->read_ok)) + return 0; + + /* Write-safe commands require a writable open */ + if (test_bit(cmd[0], filter->write_ok) && has_write_perm) + return 0; + + return -EPERM; +} +EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE)) + if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) return -EPERM; /* @@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE); + err = blk_verify_command(rq->cmd, mode & FMODE_WRITE); if (err) goto error; @@ -645,5 +673,10 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod blk_put_queue(q); return err; } - EXPORT_SYMBOL(scsi_cmd_ioctl); + +int __init blk_scsi_ioctl_init(void) +{ + blk_set_cmd_filter_defaults(&blk_default_cmd_filter); + return 0; +} diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 8201387b4daa..ef142fd47a83 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -210,13 +210,11 @@ static void sg_put_dev(Sg_device *sdp); static int sg_allow_access(struct file *filp, unsigned char *cmd) { struct sg_fd *sfp = (struct sg_fd *)filp->private_data; - struct request_queue *q = sfp->parentdp->device->request_queue; if (sfp->parentdp->device->type == TYPE_SCANNER) return 0; - return blk_verify_command(&q->cmd_filter, - cmd, filp->f_mode & FMODE_WRITE); + return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE); } static int diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8963d9149b5f..49ae07951d55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -301,12 +301,6 @@ struct blk_queue_tag { #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) -struct blk_cmd_filter { - unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; - unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; - struct kobject kobj; -}; - struct queue_limits { unsigned long bounce_pfn; unsigned long seg_boundary_mask; @@ -445,7 +439,6 @@ struct request_queue #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif - struct blk_cmd_filter cmd_filter; }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ @@ -998,13 +991,7 @@ static inline int sb_issue_discard(struct super_block *sb, return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); } -/* -* command filter functions -*/ -extern int blk_verify_command(struct blk_cmd_filter *filter, - unsigned char *cmd, fmode_t has_write_perm); -extern void blk_unregister_filter(struct gendisk *disk); -extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); +extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 From db64f680ba4b5c56c4be59f0698000df89ff0281 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 30 Jun 2009 09:35:44 +0200 Subject: [PATCH 291/741] blocK: Restore barrier support for md and probably other virtual devices. The next_ordered flag is only meaningful for devices that use __make_request. So move the test against next_ordered out of generic code and in to __make_request Since this test was added, barriers have not worked on md or any devices that don't use __make_request and so don't bother to set next_ordered. (dm explicitly sets something other than QUEUE_ORDERED_NONE since commit 99360b4c18f7675b50d283301d46d755affe75fd but notes in the comments that it is otherwise meaningless). Cc: Ken Milmore Cc: stable@kernel.org Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 02b87134a167..4b45435c6eaf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1170,6 +1170,11 @@ static int __make_request(struct request_queue *q, struct bio *bio) const int unplug = bio_unplug(bio); int rw_flags; + if (bio_barrier(bio) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + bio_endio(bio, -EOPNOTSUPP); + return 0; + } /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1470,11 +1475,6 @@ static inline void __generic_make_request(struct bio *bio) err = -EOPNOTSUPP; goto end_io; } - if (bio_barrier(bio) && bio_has_data(bio) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { - err = -EOPNOTSUPP; - goto end_io; - } ret = q->make_request_fn(q, bio); } while (ret); From 88a69dfbc6ab1e3b51bba8c9103055e21089ebb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 11:17:20 +0200 Subject: [PATCH 292/741] perf report: Fix HV bit mismerge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: builtin-report.c: In function ‘hist_entry__add’: builtin-report.c:1015: error: case label not within a switch statement builtin-report.c:1017: error: break statement not within loop or switch Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 197793051fa5..7d2b49adcdc5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -964,6 +964,9 @@ resolve_callchain(struct thread *thread, struct map *map, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1012,9 +1015,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - case PERF_CONTEXT_HV: - dso = hypervisor_dso; - break; if ((sort__has_parent || callchain) && chain) syms = resolve_callchain(thread, map, chain, &entry); From a70c352a37671fe1ebcbd317b439aa4760f4ccb7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 1 Jul 2009 11:51:18 +0300 Subject: [PATCH 293/741] xen: Use kcalloc() in xen_init_IRQ() The init_IRQ() function is now called with slab allocator initialized. Therefore, we must not use the bootmem allocator in xen_init_IRQ(). Fixes the following boot-time warning: ------------[ cut here ]------------ WARNING: at mm/bootmem.c:535 alloc_arch_preferred_bootmem+0x27/0x45() Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.30 #1 Call Trace: [] ? warn_slowpath_common+0x73/0xb0 [] ? pvclock_clocksource_read+0x49/0x90 [] ? alloc_arch_preferred_bootmem+0x27/0x45 [] ? ___alloc_bootmem_nopanic+0x39/0xc9 [] ? ___alloc_bootmem+0x9/0x2f [] ? xen_init_IRQ+0x25/0x61 [] ? start_kernel+0x1b5/0x29e ---[ end trace 4eaa2a86a8e2da22 ]--- Acked-by: Jeremy Fitzhardinge Tested-by: Christian Kujau Reported-by: Christian Kujau Signed-off-by: Pekka Enberg Cc: lists@nerdbynature.de Cc: jeremy.fitzhardinge@citrix.com LKML-Reference: <1246438278.22417.28.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 891d2e90753a..abad71b1632b 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -927,9 +927,9 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { void __init xen_init_IRQ(void) { int i; - size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s); - cpu_evtchn_mask_p = alloc_bootmem(size); + cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), + GFP_KERNEL); BUG_ON(cpu_evtchn_mask_p == NULL); init_evtchn_cpu_bindings(); From b706f64281b24d8b1fdc8ae883700131d365c412 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Jul 2009 12:41:14 +0200 Subject: [PATCH 294/741] cfq-iosched: remove redundant check for NULL cfqq in cfq_set_request() With the changes for falling back to an oom_cfqq, we never fail to find/allocate a queue in cfq_get_queue(). So remove the check. Signed-off-by: Shan Wei Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1d9160ffa26d..87276eb83f7f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2313,10 +2313,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) cfqq = cic_to_cfqq(cic, is_sync); if (!cfqq) { cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); - - if (!cfqq) - goto queue_fail; - cic_set_cfqq(cic, cfqq, is_sync); } From f37a291c527c954df4da568de718ebb36b8261c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 12:37:06 +0200 Subject: [PATCH 295/741] perf_counter tools: Add more warnings and fix/annotate them Enable -Wextra. This found a few real bugs plus a number of signed/unsigned type mismatches/uncleanlinesses. It also required a few annotations All things considered it was still worth it so lets try with this enabled for now. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- tools/perf/builtin-annotate.c | 12 ++++----- tools/perf/builtin-help.c | 6 +++-- tools/perf/builtin-list.c | 2 +- tools/perf/builtin-record.c | 4 +-- tools/perf/builtin-report.c | 22 +++++++-------- tools/perf/builtin-stat.c | 18 +++++++------ tools/perf/builtin-top.c | 8 +++--- tools/perf/perf.c | 5 +--- tools/perf/perf.h | 2 ++ tools/perf/util/alias.c | 2 +- tools/perf/util/cache.h | 1 + tools/perf/util/callchain.c | 15 ++++++----- tools/perf/util/callchain.h | 10 +++---- tools/perf/util/color.c | 10 ++++--- tools/perf/util/config.c | 18 +++++++------ tools/perf/util/exec_cmd.c | 5 +++- tools/perf/util/help.c | 26 ++++++++++-------- tools/perf/util/help.h | 6 ++--- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-options.c | 2 +- tools/perf/util/parse-options.h | 25 +++++++++-------- tools/perf/util/quote.c | 48 ++++++++++++++++++--------------- tools/perf/util/quote.h | 2 +- tools/perf/util/strbuf.c | 13 ++++----- tools/perf/util/strbuf.h | 10 +++---- tools/perf/util/wrapper.c | 5 ++-- 27 files changed, 152 insertions(+), 129 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f572c90f610e..eddf076b19d7 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -164,7 +164,7 @@ endif # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 722c0f54e549..6cba70daf121 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -160,7 +160,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -203,7 +203,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -600,7 +600,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -1069,7 +1069,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) static const char *prev_color; unsigned int offset; size_t line_len; - u64 line_ip; + s64 line_ip; int ret; char *c; @@ -1428,7 +1428,7 @@ more: head += size; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; rc = EXIT_SUCCESS; @@ -1492,7 +1492,7 @@ static void setup_sorting(void) free(str); } -int cmd_annotate(int argc, const char **argv, const char *prefix) +int cmd_annotate(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 0f32dc3f3c4c..2599d86a733b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -3,6 +3,7 @@ * * Builtin help command */ +#include "perf.h" #include "util/cache.h" #include "builtin.h" #include "util/exec_cmd.h" @@ -277,7 +278,7 @@ static struct cmdnames main_cmds, other_cmds; void list_common_cmds_help(void) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { if (longest < strlen(common_cmds[i].name)) @@ -415,9 +416,10 @@ static void show_html_page(const char *perf_cmd) open_html(page_path.buf); } -int cmd_help(int argc, const char **argv, const char *prefix) +int cmd_help(int argc, const char **argv, const char *prefix __used) { const char *alias; + load_command_list("perf-", &main_cmds, &other_cmds); perf_config(perf_help_config, NULL); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index fe60e37c96ef..f990fa8a35c9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -13,7 +13,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" -int cmd_list(int argc, const char **argv, const char *prefix) +int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) { print_events(); return 0; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d18546f37d7c..4ef78a5e6f32 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -294,7 +294,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header.type = PERF_EVENT_MMAP, + .header = { .type = PERF_EVENT_MMAP }, }; int n; size_t size; @@ -650,7 +650,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_record(int argc, const char **argv, const char *prefix) +int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7d2b49adcdc5..007363db3b16 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -177,7 +177,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -239,7 +239,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -712,7 +712,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -801,7 +801,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += fprintf(fp, " %s\n", chain->sym->name); else ret += fprintf(fp, " %p\n", - (void *)chain->ip); + (void *)(long)chain->ip); } return ret; @@ -938,12 +938,12 @@ static int call__match(struct symbol *sym) } static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map, +resolve_callchain(struct thread *thread, struct map *map __used, struct ip_callchain *chain, struct hist_entry *entry) { - int i; - struct symbol **syms; u64 context = PERF_CONTEXT_MAX; + struct symbol **syms; + unsigned int i; if (callchain) { syms = calloc(chain->nr, sizeof(*syms)); @@ -1183,7 +1183,7 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { - int i; + unsigned int i; if (exclude_other && (se == &sort_parent)) continue; @@ -1271,7 +1271,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (long long)period); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int i; + unsigned int i; chain = (void *)more_data; @@ -1667,7 +1667,7 @@ more: if (offset + head >= header->data_offset + header->data_size) goto done; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; done: @@ -1756,7 +1756,7 @@ static void setup_list(struct strlist **list, const char *list_str, } } -int cmd_report(int argc, const char **argv, const char *prefix) +int cmd_report(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e03524a1de0..095a90e012a1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -64,7 +64,7 @@ static struct perf_counter_attr default_attrs[] = { static int system_wide = 0; static int verbose = 0; -static int nr_cpus = 0; +static unsigned int nr_cpus = 0; static int run_idx = 0; static int run_count = 1; @@ -108,7 +108,8 @@ static void create_perf_stat_counter(int counter, int pid) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { - int cpu; + unsigned int cpu; + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) @@ -150,8 +151,8 @@ static inline int nsec_counter(int counter) static void read_counter(int counter) { u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; + unsigned int cpu; + size_t res, nv; int scaled; count = event_res[run_idx][counter]; @@ -165,6 +166,7 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -200,7 +202,7 @@ static void read_counter(int counter) runtime_cycles[run_idx] = count[0]; } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; int status = 0; @@ -390,7 +392,7 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - if (event_scaled[i][j] != -1) + if (event_scaled[i][j] != (u64)-1) update_avg("scaled", j, event_scaled_avg + j, event_scaled[i]+j); else @@ -510,7 +512,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_stat(int argc, const char **argv, const char *prefix) +int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; @@ -528,7 +530,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); + assert((int)nr_cpus >= 0); /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0506cd6e04cc..5f5e7df8302d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -269,7 +269,7 @@ static void print_sym_table(void) } } -static void *display_thread(void *arg) +static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; int delay_msecs = delay_secs * 1000; @@ -287,7 +287,7 @@ static void *display_thread(void *arg) } /* Tag samples to be skipped. */ -char *skip_symbols[] = { +static const char *skip_symbols[] = { "default_idle", "cpu_idle", "enter_idle", @@ -426,7 +426,7 @@ static void process_event(u64 ip, int counter, int user) struct mmap_data { int counter; void *base; - unsigned int mask; + int mask; unsigned int prev; }; @@ -705,7 +705,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_top(int argc, const char **argv, const char *prefix) +int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb725933703..c5656784c61d 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -229,9 +229,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); - if (p->option & NEED_WORK_TREE) - /* setup_work_tree() */; - status = p->fn(argc, argv, prefix); if (status) return status & 0xff; @@ -266,7 +263,7 @@ static void handle_internal_command(int argc, const char **argv) { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, }; - int i; + unsigned int i; static const char ext[] = STRIP_EXTENSION; if (sizeof(ext) > 1) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ce394192c85a..27887c916439 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,6 +52,8 @@ static inline unsigned long long rdclock(void) #define __user #define asmlinkage +#define __used __attribute__((__unused__)) + #define unlikely(x) __builtin_expect(!!(x), 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index 9b3dd2b428df..b8144e80bb1e 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -3,7 +3,7 @@ static const char *alias_key; static char *alias_val; -static int alias_lookup_cb(const char *k, const char *v, void *cb) +static int alias_lookup_cb(const char *k, const char *v, void *cb __used) { if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { if (!v) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 393d6146d13b..161d5f413e28 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -3,6 +3,7 @@ #include "util.h" #include "strbuf.h" +#include "../perf.h" #define PERF_DIR_ENVIRONMENT "PERF_DIR" #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 440db12c6359..3dceabd9b5ef 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -92,7 +92,7 @@ static void fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, struct symbol **syms) { - int i; + unsigned int i; for (i = start; i < chain->nr; i++) { struct callchain_list *call; @@ -135,7 +135,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, { struct callchain_node *new; struct list_head *old_tail; - int idx_total = idx_parents + idx_local; + unsigned int idx_total = idx_parents + idx_local; /* split */ new = create_child(parent, true); @@ -164,17 +164,18 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms); + unsigned int start, struct symbol **syms); static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms, int start) + struct symbol **syms, unsigned int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, start, syms); + unsigned int ret = __append_chain(rnode, chain, start, syms); + if (!ret) return; } @@ -184,10 +185,10 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms) + unsigned int start, struct symbol **syms) { struct callchain_list *cnode; - int i = start; + unsigned int i = start; bool found = false; /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c942daa712e6..251d99ecd225 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -10,15 +10,15 @@ struct callchain_node { struct callchain_node *parent; struct list_head brothers; - struct list_head children; - struct list_head val; + struct list_head children; + struct list_head val; struct rb_node rb_node; - int val_nr; - int hit; + unsigned int val_nr; + u64 hit; }; struct callchain_list { - unsigned long ip; + u64 ip; struct symbol *sym; struct list_head list; }; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 9a8c20ccc53e..26f82318b86b 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -11,7 +11,8 @@ static int parse_color(const char *name, int len) }; char *end; int i; - for (i = 0; i < ARRAY_SIZE(color_names); i++) { + + for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { const char *str = color_names[i]; if (!strncasecmp(name, str, len) && !str[len]) return i - 1; @@ -28,7 +29,8 @@ static int parse_attr(const char *name, int len) static const char * const attr_names[] = { "bold", "dim", "ul", "blink", "reverse" }; - int i; + unsigned int i; + for (i = 0; i < ARRAY_SIZE(attr_names); i++) { const char *str = attr_names[i]; if (!strncasecmp(name, str, len) && !str[len]) @@ -222,10 +224,12 @@ int color_fwrite_lines(FILE *fp, const char *color, { if (!*color) return fwrite(buf, count, 1, fp) != 1; + while (count) { char *p = memchr(buf, '\n', count); + if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? p - buf : count, 1, fp) != 1 || + fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || fputs(PERF_COLOR_RESET, fp) < 0)) return -1; if (!p) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3dd13faa6a27..780df541006d 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -47,10 +47,12 @@ static int get_next_char(void) static char *parse_value(void) { static char value[1024]; - int quote = 0, comment = 0, len = 0, space = 0; + int quote = 0, comment = 0, space = 0; + size_t len = 0; for (;;) { int c = get_next_char(); + if (len >= sizeof(value) - 1) return NULL; if (c == '\n') { @@ -353,13 +355,13 @@ int perf_config_string(const char **dest, const char *var, const char *value) return 0; } -static int perf_default_core_config(const char *var, const char *value) +static int perf_default_core_config(const char *var __used, const char *value __used) { /* Add other config variables here and to Documentation/config.txt. */ return 0; } -int perf_default_config(const char *var, const char *value, void *dummy) +int perf_default_config(const char *var, const char *value, void *dummy __used) { if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); @@ -471,10 +473,10 @@ static int matches(const char* key, const char* value) !regexec(store.value_regex, value, 0, NULL, 0))); } -static int store_aux(const char* key, const char* value, void *cb) +static int store_aux(const char* key, const char* value, void *cb __used) { + int section_len; const char *ep; - size_t section_len; switch (store.state) { case KEY_SEEN: @@ -551,7 +553,7 @@ static int store_write_section(int fd, const char* key) strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); } - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -599,7 +601,7 @@ static int store_write_pair(int fd, const char* key, const char* value) } strbuf_addf(&sb, "%s\n", quote); - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -741,7 +743,7 @@ int perf_config_set_multivar(const char* key, const char* value, } else { struct stat st; char* contents; - size_t contents_sz, copy_begin, copy_end; + ssize_t contents_sz, copy_begin, copy_end; int i, new_line = 0; if (value_regex == NULL) diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c index d39292263153..34a352867382 100644 --- a/tools/perf/util/exec_cmd.c +++ b/tools/perf/util/exec_cmd.c @@ -1,6 +1,9 @@ #include "cache.h" #include "exec_cmd.h" #include "quote.h" + +#include + #define MAX_ARGS 32 extern char **environ; @@ -51,7 +54,7 @@ const char *perf_extract_argv0_path(const char *argv0) slash--; if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); + argv0_path = xstrndup(argv0, slash - argv0); return slash + 1; } diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 17a00e0df2c4..fbb00978b2e2 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -26,7 +26,7 @@ static int term_columns(void) return 80; } -void add_cmdname(struct cmdnames *cmds, const char *name, int len) +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { struct cmdname *ent = malloc(sizeof(*ent) + len + 1); @@ -40,7 +40,8 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len) static void clean_cmdnames(struct cmdnames *cmds) { - int i; + unsigned int i; + for (i = 0; i < cmds->cnt; ++i) free(cmds->names[i]); free(cmds->names); @@ -57,7 +58,7 @@ static int cmdname_compare(const void *a_, const void *b_) static void uniq(struct cmdnames *cmds) { - int i, j; + unsigned int i, j; if (!cmds->cnt) return; @@ -71,7 +72,7 @@ static void uniq(struct cmdnames *cmds) void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) { - int ci, cj, ei; + size_t ci, cj, ei; int cmp; ci = cj = ei = 0; @@ -106,8 +107,9 @@ static void pretty_print_string_list(struct cmdnames *cmds, int longest) printf(" "); for (j = 0; j < cols; j++) { - int n = j * rows + i; - int size = space; + unsigned int n = j * rows + i; + unsigned int size = space; + if (n >= cmds->cnt) break; if (j == cols-1 || n + rows >= cmds->cnt) @@ -208,7 +210,7 @@ void load_command_list(const char *prefix, void list_commands(const char *title, struct cmdnames *main_cmds, struct cmdnames *other_cmds) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < main_cmds->cnt; i++) if (longest < main_cmds->names[i]->len) @@ -239,7 +241,8 @@ void list_commands(const char *title, struct cmdnames *main_cmds, int is_in_cmdlist(struct cmdnames *c, const char *s) { - int i; + unsigned int i; + for (i = 0; i < c->cnt; i++) if (!strcmp(s, c->names[i]->name)) return 1; @@ -271,7 +274,8 @@ static int levenshtein_compare(const void *p1, const void *p2) static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) { - int i; + unsigned int i; + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); for (i = 0; i < old->cnt; i++) @@ -283,7 +287,7 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) const char *help_unknown_cmd(const char *cmd) { - int i, n = 0, best_similarity = 0; + unsigned int i, n = 0, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); @@ -345,7 +349,7 @@ const char *help_unknown_cmd(const char *cmd) exit(1); } -int cmd_version(int argc, const char **argv, const char *prefix) +int cmd_version(int argc __used, const char **argv __used, const char *prefix __used) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h index 56bc15406ffc..7128783637b4 100644 --- a/tools/perf/util/help.h +++ b/tools/perf/util/help.h @@ -2,8 +2,8 @@ #define HELP_H struct cmdnames { - int alloc; - int cnt; + size_t alloc; + size_t cnt; struct cmdname { size_t len; /* also used for similarity index in help.c */ char name[FLEX_ARRAY]; @@ -19,7 +19,7 @@ static inline void mput_char(char c, unsigned int num) void load_command_list(const char *prefix, struct cmdnames *main_cmds, struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, int len); +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len); /* Here we require that excludes is a sorted list. */ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); int is_in_cmdlist(struct cmdnames *c, const char *s); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e6b83a3311a5..aed70901df24 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -385,7 +385,7 @@ static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) return 1; } -int parse_events(const struct option *opt, const char *str, int unset) +int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_counter_attr attr; diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b3affb1658d2..9a897b7cce79 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -485,7 +485,7 @@ int parse_options_usage(const char * const *usagestr, } -int parse_opt_verbosity_cb(const struct option *opt, const char *arg, +int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used, int unset) { int *target = opt->value; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index a1039a6ce0eb..15c8aba9c62e 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -90,21 +90,20 @@ struct option { intptr_t defval; }; -#define OPT_END() { OPTION_END } -#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } -#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } -#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } -#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } -#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } -#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } -#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } -#define OPT_LONG(s, l, v, h) { OPTION_LONG, (s), (l), (v), NULL, (h) } -#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } +#define OPT_END() { .type = OPTION_END } +#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } +#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } +#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (b) } +#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_SET_INT(s, l, v, h, i) { .type = OPTION_SET_INT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (i) } +#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } +#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h) } #define OPT_DATE(s, l, v, h) \ - { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ - parse_opt_approxidate_cb } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ - { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index f18c5212bc92..c6e5dc0dc82f 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -162,12 +162,16 @@ static inline int sq_must_quote(char c) return sq_lookup[(unsigned char)c] + quote_path_fully > 0; } -/* returns the longest prefix not needing a quote up to maxlen if positive. - This stops at the first \0 because it's marked as a character needing an - escape */ -static size_t next_quote_pos(const char *s, ssize_t maxlen) +/* + * Returns the longest prefix not needing a quote up to maxlen if + * positive. + * This stops at the first \0 because it's marked as a character + * needing an escape. + */ +static ssize_t next_quote_pos(const char *s, ssize_t maxlen) { - size_t len; + ssize_t len; + if (maxlen < 0) { for (len = 0; !sq_must_quote(s[len]); len++); } else { @@ -192,22 +196,22 @@ static size_t next_quote_pos(const char *s, ssize_t maxlen) static size_t quote_c_style_counted(const char *name, ssize_t maxlen, struct strbuf *sb, FILE *fp, int no_dq) { -#undef EMIT -#define EMIT(c) \ - do { \ - if (sb) strbuf_addch(sb, (c)); \ - if (fp) fputc((c), fp); \ - count++; \ - } while (0) -#define EMITBUF(s, l) \ - do { \ - int __ret; \ - if (sb) strbuf_add(sb, (s), (l)); \ - if (fp) __ret = fwrite((s), (l), 1, fp); \ - count += (l); \ +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ } while (0) - size_t len, count = 0; +#define EMITBUF(s, l) \ + do { \ + int __ret; \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) __ret = fwrite((s), (l), 1, fp); \ + count += (l); \ + } while (0) + + ssize_t len, count = 0; const char *p = name; for (;;) { @@ -273,8 +277,8 @@ void write_name_quoted(const char *name, FILE *fp, int terminator) fputc(terminator, fp); } -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *fp, int terminator) +void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, + const char *name, FILE *fp, int terminator) { int needquote = 0; @@ -306,7 +310,7 @@ char *quote_path_relative(const char *in, int len, len = strlen(in); /* "../" prefix itself does not need quoting, but "in" might. */ - needquote = next_quote_pos(in, len) < len; + needquote = (next_quote_pos(in, len) < len); strbuf_setlen(out, 0); strbuf_grow(out, len); diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 5dfad89816db..a5454a1d1c13 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -53,7 +53,7 @@ extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); extern void write_name_quoted(const char *name, FILE *, int terminator); -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, +extern void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, const char *name, FILE *, int terminator); /* quote path as relative to the given prefix */ diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 464e7ca898cf..5249d5a1b0c2 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -16,7 +16,7 @@ int prefixcmp(const char *str, const char *prefix) */ char strbuf_slopbuf[1]; -void strbuf_init(struct strbuf *sb, size_t hint) +void strbuf_init(struct strbuf *sb, ssize_t hint) { sb->alloc = sb->len = 0; sb->buf = strbuf_slopbuf; @@ -92,7 +92,8 @@ void strbuf_ltrim(struct strbuf *sb) void strbuf_tolower(struct strbuf *sb) { - int i; + unsigned int i; + for (i = 0; i < sb->len; i++) sb->buf[i] = tolower(sb->buf[i]); } @@ -264,7 +265,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) return res; } -ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) +ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; size_t oldalloc = sb->alloc; @@ -293,7 +294,7 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) #define STRBUF_MAXLINK (2*PATH_MAX) -int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) +int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint) { size_t oldalloc = sb->alloc; @@ -301,7 +302,7 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) hint = 32; while (hint < STRBUF_MAXLINK) { - int len; + ssize_t len; strbuf_grow(sb, hint); len = readlink(path, sb->buf, hint); @@ -343,7 +344,7 @@ int strbuf_getline(struct strbuf *sb, FILE *fp, int term) return 0; } -int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) +int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint) { int fd, len; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 9ee908a3ec5d..d2aa86c014c1 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -50,7 +50,7 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *, size_t); +extern void strbuf_init(struct strbuf *buf, ssize_t hint); extern void strbuf_release(struct strbuf *); extern char *strbuf_detach(struct strbuf *, size_t *); extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); @@ -61,7 +61,7 @@ static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { } /*----- strbuf size related -----*/ -static inline size_t strbuf_avail(const struct strbuf *sb) { +static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } @@ -122,9 +122,9 @@ extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); -extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); -extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); +extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +extern int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint); +extern int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint); extern int strbuf_getline(struct strbuf *, FILE *, int); diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 6350d65f6d9e..4574ac28396f 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -7,7 +7,7 @@ * There's no pack memory to release - but stay close to the Git * version so wrap this away: */ -static inline void release_pack_memory(size_t size, int flag) +static inline void release_pack_memory(size_t size __used, int flag __used) { } @@ -59,7 +59,8 @@ void *xmemdupz(const void *data, size_t len) char *xstrndup(const char *str, size_t len) { char *p = memchr(str, '\0', len); - return xmemdupz(str, p ? p - str : len); + + return xmemdupz(str, p ? (size_t)(p - str) : len); } void *xrealloc(void *ptr, size_t size) From b9ebdcc0ce1c676ebf5dc4f6df6b440d8fcf88ab Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 15:05:09 +0530 Subject: [PATCH 296/741] perf stat: Define MATCH_EVENT for easy attr checking MATCH_EVENT is useful: 1. for multiple attrs checking 2. avoid repetition of PERF_TYPE_ and PERF_COUNT_ and save space 3. avoids line breakage Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246440909.3403.5.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 095a90e012a1..01cc07efb728 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,6 +96,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; +#define MATCH_EVENT(t, c, counter) \ + (attrs[counter].type == PERF_TYPE_##t && \ + attrs[counter].config == PERF_COUNT_##c) + #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -133,13 +137,8 @@ static void create_perf_stat_counter(int counter, int pid) */ static inline int nsec_counter(int counter) { - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || + MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) return 1; return 0; @@ -194,11 +193,9 @@ static void read_counter(int counter) /* * Save the full runtime - to allow normalization during printout: */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) runtime_nsecs[run_idx] = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) runtime_cycles[run_idx] = count[0]; } @@ -292,9 +289,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { if (walltime_nsecs_avg) fprintf(stderr, " # %10.3f CPUs ", (double)count[0] / (double)walltime_nsecs_avg); @@ -307,9 +302,7 @@ static void abs_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - + MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { fprintf(stderr, " # %10.3f IPC ", (double)count[0] / (double)runtime_cycles_avg); } else { From 44973998a111dfda09b952aa0f27cad326a97793 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 17:49:38 +0530 Subject: [PATCH 297/741] x86: Remove double declaration of MSR_P6_EVNTSEL0 and MSR_P6_EVNTSEL1 MSR_P6_EVNTSEL0 and MSR_P6_EVNTSEL1 is already declared in msr-index.h. Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1246450778.6940.8.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1692fb5050e3..6be7fc254b59 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -246,10 +246,6 @@ #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) -/* Intel Model 6 */ -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 #define MSR_IA32_MCG_EBX 0x00000181 From 73c24cb86c51ff6445b292d9914d31236204393b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 18:36:18 +0530 Subject: [PATCH 298/741] perf list: Add cache events After: $ ./perf list List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] L1-d$-loads [Hardware cache event] L1-d$-load-misses [Hardware cache event] L1-d$-stores [Hardware cache event] L1-d$-store-misses [Hardware cache event] L1-d$-prefetches [Hardware cache event] L1-d$-prefetch-misses [Hardware cache event] L1-i$-loads [Hardware cache event] L1-i$-load-misses [Hardware cache event] L1-i$-prefetches [Hardware cache event] L1-i$-prefetch-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-stores [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-prefetches [Hardware cache event] LLC-prefetch-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-stores [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-prefetches [Hardware cache event] dTLB-prefetch-misses [Hardware cache event] iTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] branch-loads [Hardware cache event] branch-load-misses [Hardware cache event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1246453578.3072.1.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aed70901df24..5184959e0615 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -428,7 +428,7 @@ static const char * const event_type_descriptors[] = { void print_events(void) { struct event_symbol *syms = event_symbols; - unsigned int i, type, prev_type = -1; + unsigned int i, type, op, prev_type = -1; char name[40]; fprintf(stderr, "\n"); @@ -452,6 +452,21 @@ void print_events(void) prev_type = type; } + fprintf(stderr, "\n"); + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + fprintf(stderr, " %-40s [%s]\n", + event_cache_name(type, op, i), + event_type_descriptors[4]); + } + } + } + fprintf(stderr, "\n"); fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); From b25ae679f613ed04aaf6ccbfdb9122fce668e4bb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:53:14 +0530 Subject: [PATCH 299/741] x86: Mark device_nb as static and fix NULL noise This sparse warning: arch/x86/kernel/amd_iommu.c:1195:23: warning: symbol 'device_nb' was not declared. Should it be static? triggers because device_nb is global but is only used in a single .c file. change device_nb to static to fix that - this also addresses the sparse warning. This sparse warning: arch/x86/kernel/amd_iommu.c:1766:10: warning: Using plain integer as NULL pointer triggers because plain integer 0 is used in place of a NULL pointer. change 0 to NULL to fix that - this also address the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Joerg Roedel LKML-Reference: <1246458194.6940.20.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9372f0406ad4..6c99f5037801 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1192,7 +1192,7 @@ out: return 0; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = device_change_notifier, }; @@ -1763,7 +1763,7 @@ static void *alloc_coherent(struct device *dev, size_t size, flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) - return 0; + return NULL; paddr = virt_to_phys(virt_addr); From 76c06927f2a78143763dcff9b4c362d15eb29cc2 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:54:23 +0530 Subject: [PATCH 300/741] x86: Declare check_efer() before it gets used This sparse warning: arch/x86/mm/init.c:83:16: warning: symbol 'check_efer' was not declared. Should it be static? triggers because check_efer() is not decalared before using it. asm/proto.h includes the declaration of check_efer(), so including asm/proto.h to fix that - this also addresses the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Andrew Morton LKML-Reference: <1246458263.6940.22.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 47ce9a2ce5e7..0607119cef94 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -12,6 +12,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); From ff84847171508a3c76eb7e483204d1be7738729b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Jul 2009 18:08:01 +0200 Subject: [PATCH 301/741] ALSA: hda - Add quirk for HP 6930p Added a quirk model=laptop for HP 6930p (103c:30dc) with AD1984A codec. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 84cc49ca9148..85e8618e8497 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -3966,6 +3966,7 @@ static struct snd_pci_quirk ad1884a_cfg_tbl[] = { SND_PCI_QUIRK(0x103c, 0x3037, "HP 2230s", AD1884A_LAPTOP), SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE), SND_PCI_QUIRK_MASK(0x103c, 0xfff0, 0x3070, "HP", AD1884A_MOBILE), + SND_PCI_QUIRK_MASK(0x103c, 0xfff0, 0x30d0, "HP laptop", AD1884A_LAPTOP), SND_PCI_QUIRK_MASK(0x103c, 0xfff0, 0x30e0, "HP laptop", AD1884A_LAPTOP), SND_PCI_QUIRK_MASK(0x103c, 0xff00, 0x3600, "HP laptop", AD1884A_LAPTOP), SND_PCI_QUIRK(0x17aa, 0x20ac, "Thinkpad X300", AD1884A_THINKPAD), From 57d81f6f393b245894ca0cd828f80ce7e3294f39 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 09:43:53 +0200 Subject: [PATCH 302/741] kmemleak: Fix scheduling-while-atomic bug One of the kmemleak changes caused the following scheduling-while-holding-the-tasklist-lock regression on x86: BUG: sleeping function called from invalid context at mm/kmemleak.c:795 in_atomic(): 1, irqs_disabled(): 0, pid: 1737, name: kmemleak 2 locks held by kmemleak/1737: #0: (scan_mutex){......}, at: [] kmemleak_scan_thread+0x45/0x86 #1: (tasklist_lock){......}, at: [] kmemleak_scan+0x1a9/0x39c Pid: 1737, comm: kmemleak Not tainted 2.6.31-rc1-tip #59266 Call Trace: [] ? __debug_show_held_locks+0x1e/0x20 [] __might_sleep+0x10a/0x111 [] scan_yield+0x17/0x3b [] scan_block+0x39/0xd4 [] kmemleak_scan+0x1bb/0x39c [] ? kmemleak_scan_thread+0x0/0x86 [] kmemleak_scan_thread+0x4a/0x86 [] kthread+0x6e/0x73 [] ? kthread+0x0/0x73 [] kernel_thread_helper+0x7/0x10 kmemleak: 834 new suspected memory leaks (see /sys/kernel/debug/kmemleak) The bit causing it is highly dubious: static void scan_yield(void) { might_sleep(); if (time_is_before_eq_jiffies(next_scan_yield)) { schedule(); next_scan_yield = jiffies + jiffies_scan_yield; } } It called deep inside the codepath and in a conditional way, and that is what crapped up when one of the new scan_block() uses grew a tasklist_lock dependency. This minimal patch removes that yielding stuff and adds the proper cond_resched(). The background scanning thread could probably also be reniced to +10. Signed-off-by: Ingo Molnar Acked-by: Pekka Enberg Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index eeece2deace2..e766e1da09d2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -105,7 +105,6 @@ #define MAX_TRACE 16 /* stack trace length */ #define REPORTS_NR 50 /* maximum number of reported leaks */ #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ -#define MSECS_SCAN_YIELD 10 /* CPU yielding period */ #define SECS_FIRST_SCAN 60 /* delay before the first scan */ #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ @@ -186,10 +185,7 @@ static atomic_t kmemleak_error = ATOMIC_INIT(0); static unsigned long min_addr = ULONG_MAX; static unsigned long max_addr; -/* used for yielding the CPU to other tasks during scanning */ -static unsigned long next_scan_yield; static struct task_struct *scan_thread; -static unsigned long jiffies_scan_yield; /* used to avoid reporting of recently allocated objects */ static unsigned long jiffies_min_age; static unsigned long jiffies_last_scan; @@ -785,21 +781,6 @@ void kmemleak_no_scan(const void *ptr) } EXPORT_SYMBOL(kmemleak_no_scan); -/* - * Yield the CPU so that other tasks get a chance to run. The yielding is - * rate-limited to avoid excessive number of calls to the schedule() function - * during memory scanning. - */ -static void scan_yield(void) -{ - might_sleep(); - - if (time_is_before_eq_jiffies(next_scan_yield)) { - schedule(); - next_scan_yield = jiffies + jiffies_scan_yield; - } -} - /* * Memory scanning is a long process and it needs to be interruptable. This * function checks whether such interrupt condition occured. @@ -840,15 +821,6 @@ static void scan_block(void *_start, void *_end, if (scan_should_stop()) break; - /* - * When scanning a memory block with a corresponding - * kmemleak_object, the CPU yielding is handled in the calling - * code since it holds the object->lock to avoid the block - * freeing. - */ - if (!scanned) - scan_yield(); - object = find_and_get_object(pointer, 1); if (!object) continue; @@ -1014,7 +986,7 @@ static void kmemleak_scan(void) */ object = list_entry(gray_list.next, typeof(*object), gray_list); while (&object->gray_list != &gray_list) { - scan_yield(); + cond_resched(); /* may add new objects to the list */ if (!scan_should_stop()) @@ -1385,7 +1357,6 @@ void __init kmemleak_init(void) int i; unsigned long flags; - jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD); jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); From da9ff1f796e81976935407251815838bef9868d4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Jul 2009 18:23:26 +0100 Subject: [PATCH 303/741] ASoC: Only disable pxa2xx-i2s clocks if we enabled them The clock API can't cope with unbalanced enables and disables and we only enable in hw_params() but try to disable in shutdown. Signed-off-by: Mark Brown --- sound/soc/pxa/pxa2xx-i2s.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index 4743e262895d..6b8f655d1ad8 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -167,6 +167,7 @@ static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream, BUG_ON(IS_ERR(clk_i2s)); clk_enable(clk_i2s); + dai->private_data = dai; pxa_i2s_wait(); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) @@ -255,7 +256,10 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream, if ((SACR1 & (SACR1_DREC | SACR1_DRPL)) == (SACR1_DREC | SACR1_DRPL)) { SACR0 &= ~SACR0_ENB; pxa_i2s_wait(); - clk_disable(clk_i2s); + if (dai->private_data != NULL) { + clk_disable(clk_i2s); + dai->private_data = NULL; + } } } @@ -336,6 +340,7 @@ static int pxa2xx_i2s_probe(struct platform_device *dev) return PTR_ERR(clk_i2s); pxa_i2s_dai.dev = &dev->dev; + pxa_i2s_dai.private_data = NULL; ret = snd_soc_register_dai(&pxa_i2s_dai); if (ret != 0) clk_put(clk_i2s); From 63eeaf38251183ec2b1caee11e4a2c040cb5ce6c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 18 Jun 2009 16:56:52 -0700 Subject: [PATCH 304/741] drm/i915: enable error detection & state collection This patch enables error detection by enabling several types of error interrupts. When an error interrupt is received, the interrupt handler captures the error state; hopefully resulting in an accurate set of error data (error type, active head pointer, etc.). The new record is then available from sysfs. The current code will also dump the error state to the system log. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 19 ++++ drivers/gpu/drm/i915/i915_gem_debugfs.c | 34 ++++++ drivers/gpu/drm/i915/i915_irq.c | 139 +++++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 14 +++ 5 files changed, 204 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f112c769d533..f83364974a8a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1180,6 +1180,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) pci_enable_msi(dev->pdev); spin_lock_init(&dev_priv->user_irq_lock); + spin_lock_init(&dev_priv->error_lock); dev_priv->user_irq_refcount = 0; ret = drm_vblank_init(dev, I915_NUM_PIPE); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bb4c2d387b6c..596e119d3e0e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -133,6 +133,22 @@ struct sdvo_device_mapping { u8 initialized; }; +struct drm_i915_error_state { + u32 eir; + u32 pgtbl_er; + u32 pipeastat; + u32 pipebstat; + u32 ipeir; + u32 ipehr; + u32 instdone; + u32 acthd; + u32 instpm; + u32 instps; + u32 instdone1; + u32 seqno; + struct timeval time; +}; + typedef struct drm_i915_private { struct drm_device *dev; @@ -209,6 +225,9 @@ typedef struct drm_i915_private { int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ int num_fence_regs; /* 8 on pre-965, 16 otherwise */ + spinlock_t error_lock; + struct drm_i915_error_state *first_error; + /* Register state */ u8 saveLBB; u32 saveDSPACNTR; diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c index 28146e405e87..cacae945338b 100644 --- a/drivers/gpu/drm/i915/i915_gem_debugfs.c +++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c @@ -323,6 +323,39 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data) return 0; } +static int i915_error_state(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_error_state *error; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->error_lock, flags); + if (!dev_priv->first_error) { + seq_printf(m, "no error state collected\n"); + goto out; + } + + error = dev_priv->first_error; + + seq_printf(m, "EIR: 0x%08x\n", error->eir); + seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); + seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); + seq_printf(m, " IPEIR: 0x%08x\n", error->ipeir); + seq_printf(m, " IPEHR: 0x%08x\n", error->ipehr); + seq_printf(m, " INSTDONE: 0x%08x\n", error->instdone); + seq_printf(m, " ACTHD: 0x%08x\n", error->acthd); + if (IS_I965G(dev)) { + seq_printf(m, " INSTPS: 0x%08x\n", error->instps); + seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1); + } + +out: + spin_unlock_irqrestore(&dev_priv->error_lock, flags); + + return 0; +} static struct drm_info_list i915_gem_debugfs_list[] = { {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, @@ -336,6 +369,7 @@ static struct drm_info_list i915_gem_debugfs_list[] = { {"i915_ringbuffer_data", i915_ringbuffer_data, 0}, {"i915_ringbuffer_info", i915_ringbuffer_info, 0}, {"i915_batchbuffers", i915_batchbuffer_info, 0}, + {"i915_error_state", i915_error_state, 0}, }; #define I915_GEM_DEBUGFS_ENTRIES ARRAY_SIZE(i915_gem_debugfs_list) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 228546f6eaa4..17b308592c4f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -26,6 +26,7 @@ * */ +#include #include "drmP.h" #include "drm.h" #include "i915_drm.h" @@ -41,9 +42,10 @@ * we leave them always unmasked in IMR and then control enabling them through * PIPESTAT alone. */ -#define I915_INTERRUPT_ENABLE_FIX (I915_ASLE_INTERRUPT | \ - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT) +#define I915_INTERRUPT_ENABLE_FIX (I915_ASLE_INTERRUPT | \ + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | \ + I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) /** Interrupts that we mask and unmask at runtime. */ #define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT) @@ -288,6 +290,47 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) return ret; } +static void i915_capture_error_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_error_state *error; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->error_lock, flags); + if (dev_priv->first_error) + goto out; + + error = kmalloc(sizeof(*error), GFP_ATOMIC); + if (!error) { + DRM_DEBUG("out ot memory, not capturing error state\n"); + goto out; + } + + error->eir = I915_READ(EIR); + error->pgtbl_er = I915_READ(PGTBL_ER); + error->pipeastat = I915_READ(PIPEASTAT); + error->pipebstat = I915_READ(PIPEBSTAT); + error->instpm = I915_READ(INSTPM); + if (!IS_I965G(dev)) { + error->ipeir = I915_READ(IPEIR); + error->ipehr = I915_READ(IPEHR); + error->instdone = I915_READ(INSTDONE); + error->acthd = I915_READ(ACTHD); + } else { + error->ipeir = I915_READ(IPEIR_I965); + error->ipehr = I915_READ(IPEHR_I965); + error->instdone = I915_READ(INSTDONE_I965); + error->instps = I915_READ(INSTPS); + error->instdone1 = I915_READ(INSTDONE1); + error->acthd = I915_READ(ACTHD_I965); + } + + dev_priv->first_error = error; + +out: + spin_unlock_irqrestore(&dev_priv->error_lock, flags); +} + irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -362,6 +405,80 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) I915_READ(PORT_HOTPLUG_STAT); } + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) { + u32 eir = I915_READ(EIR); + + i915_capture_error_state(dev); + + printk(KERN_ERR "render error detected, EIR: 0x%08x\n", + eir); + if (eir & I915_ERROR_PAGE_TABLE) { + u32 pgtbl_err = I915_READ(PGTBL_ER); + printk(KERN_ERR "page table error\n"); + printk(KERN_ERR " PGTBL_ER: 0x%08x\n", + pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + } + if (eir & I915_ERROR_MEMORY_REFRESH) { + printk(KERN_ERR "memory refresh error\n"); + printk(KERN_ERR "PIPEASTAT: 0x%08x\n", + pipea_stats); + printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", + pipeb_stats); + /* pipestat has already been acked */ + } + if (eir & I915_ERROR_INSTRUCTION) { + printk(KERN_ERR "instruction error\n"); + printk(KERN_ERR " INSTPM: 0x%08x\n", + I915_READ(INSTPM)); + if (!IS_I965G(dev)) { + u32 ipeir = I915_READ(IPEIR); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD)); + I915_WRITE(IPEIR, ipeir); + (void)I915_READ(IPEIR); + } else { + u32 ipeir = I915_READ(IPEIR_I965); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printk(KERN_ERR " INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printk(KERN_ERR " INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + } + + I915_WRITE(EIR, eir); + (void)I915_READ(EIR); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + } + } + I915_WRITE(IIR, iir); new_iir = I915_READ(IIR); /* Flush posted writes */ @@ -732,6 +849,7 @@ int i915_driver_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; u32 enable_mask = I915_INTERRUPT_ENABLE_FIX | I915_INTERRUPT_ENABLE_VAR; + u32 error_mask; DRM_INIT_WAITQUEUE(&dev_priv->irq_queue); @@ -768,6 +886,21 @@ int i915_driver_irq_postinstall(struct drm_device *dev) i915_enable_irq(dev_priv, I915_DISPLAY_PORT_INTERRUPT); } + /* + * Enable some error detection, note the instruction error mask + * bit is reserved, so we leave it masked. + */ + if (IS_G4X(dev)) { + error_mask = ~(GM45_ERROR_PAGE_TABLE | + GM45_ERROR_MEM_PRIV | + GM45_ERROR_CP_PRIV | + I915_ERROR_MEMORY_REFRESH); + } else { + error_mask = ~(I915_ERROR_PAGE_TABLE | + I915_ERROR_MEMORY_REFRESH); + } + I915_WRITE(EMR, error_mask); + /* Disable pipe interrupt enables, clear pending pipe status */ I915_WRITE(PIPEASTAT, I915_READ(PIPEASTAT) & 0x8000ffff); I915_WRITE(PIPEBSTAT, I915_READ(PIPEBSTAT) & 0x8000ffff); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 88bf7521405f..ad3d1b5db95e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -206,6 +206,7 @@ /* * Instruction and interrupt control regs */ +#define PGTBL_ER 0x02024 #define PRB0_TAIL 0x02030 #define PRB0_HEAD 0x02034 #define PRB0_START 0x02038 @@ -226,11 +227,18 @@ #define PRB1_HEAD 0x02044 /* 915+ only */ #define PRB1_START 0x02048 /* 915+ only */ #define PRB1_CTL 0x0204c /* 915+ only */ +#define IPEIR_I965 0x02064 +#define IPEHR_I965 0x02068 +#define INSTDONE_I965 0x0206c +#define INSTPS 0x02070 /* 965+ only */ +#define INSTDONE1 0x0207c /* 965+ only */ #define ACTHD_I965 0x02074 #define HWS_PGA 0x02080 #define HWS_ADDRESS_MASK 0xfffff000 #define HWS_START_ADDRESS_SHIFT 4 #define IPEIR 0x02088 +#define IPEHR 0x0208c +#define INSTDONE 0x02090 #define NOPID 0x02094 #define HWSTAM 0x02098 #define SCPD0 0x0209c /* 915+ only */ @@ -258,6 +266,12 @@ #define EIR 0x020b0 #define EMR 0x020b4 #define ESR 0x020b8 +#define GM45_ERROR_PAGE_TABLE (1<<5) +#define GM45_ERROR_MEM_PRIV (1<<4) +#define I915_ERROR_PAGE_TABLE (1<<4) +#define GM45_ERROR_CP_PRIV (1<<3) +#define I915_ERROR_MEMORY_REFRESH (1<<1) +#define I915_ERROR_INSTRUCTION (1<<0) #define INSTPM 0x020c0 #define ACTHD 0x020c8 #define FW_BLC 0x020d8 From 3238c0c4d68d9a9022b411a11a4b933fbdb53a14 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 18:56:16 +0100 Subject: [PATCH 305/741] intel-iommu: Make iommu=pt work on i386 too Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 47630479b067..1a041bcf506b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -211,11 +211,11 @@ static __init int iommu_setup(char *p) #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) swiotlb = 1; +#endif if (!strncmp(p, "pt", 2)) { iommu_pass_through = 1; return 1; } -#endif gart_parse_options(p); From e2dbe12557d85d81f4527879499f55681c3cca4f Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Wed, 1 Jul 2009 01:06:26 -0400 Subject: [PATCH 306/741] elf: fix one check-after-use Check before use it. Signed-off-by: WANG Cong Cc: Alexander Viro Cc: David Howells Acked-by: Roland McGrath Acked-by: James Morris Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f1867900e459..b7c1603cd4bd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread = NULL; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); - if (psinfo == NULL) return 0; + fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + /* * Figure out how many notes we're going to need for each thread. */ From 7662c8bd6545c12ac7b2b39e4554c3ba34789c50 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 26 Jun 2009 11:23:55 +0800 Subject: [PATCH 307/741] drm/i915: add FIFO watermark support This patch from jbarnes and myself adds FIFO watermark control to the driver. This is needed for both power saving features on new platforms with the so-called "big FIFO" and for controlling FIFO allocation between pipes in multi-head configurations. It's also necessary infrastructure to support things like framebuffer compression and configuration supportability checks (i.e. checking a configuration against available bandwidth). Signed-off-by: Jesse Barnes Signed-off-by: Shaohua Li Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_dma.c | 40 +++ drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_irq.c | 4 + drivers/gpu/drm/i915/i915_reg.h | 46 ++- drivers/gpu/drm/i915/intel_display.c | 425 ++++++++++++++++++++++++++- 5 files changed, 513 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f83364974a8a..6096600aff60 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1082,6 +1082,44 @@ void i915_master_destroy(struct drm_device *dev, struct drm_master *master) master->driver_priv = NULL; } +static void i915_get_mem_freq(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + u32 tmp; + + if (!IS_IGD(dev)) + return; + + tmp = I915_READ(CLKCFG); + + switch (tmp & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_533: + dev_priv->fsb_freq = 533; /* 133*4 */ + break; + case CLKCFG_FSB_800: + dev_priv->fsb_freq = 800; /* 200*4 */ + break; + case CLKCFG_FSB_667: + dev_priv->fsb_freq = 667; /* 167*4 */ + break; + case CLKCFG_FSB_400: + dev_priv->fsb_freq = 400; /* 100*4 */ + break; + } + + switch (tmp & CLKCFG_MEM_MASK) { + case CLKCFG_MEM_533: + dev_priv->mem_freq = 533; + break; + case CLKCFG_MEM_667: + dev_priv->mem_freq = 667; + break; + case CLKCFG_MEM_800: + dev_priv->mem_freq = 800; + break; + } +} + /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -1165,6 +1203,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_iomapfree; } + i915_get_mem_freq(dev); + /* On the 945G/GM, the chipset reports the MSI capability on the * integrated graphics even though the support isn't actually there * according to the published specs. It doesn't appear to function diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 596e119d3e0e..47ecb617e519 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -225,6 +225,8 @@ typedef struct drm_i915_private { int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ int num_fence_regs; /* 8 on pre-965, 16 otherwise */ + unsigned int fsb_freq, mem_freq; + spinlock_t error_lock; struct drm_i915_error_state *first_error; @@ -889,6 +891,8 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) +/* dsparb controlled by hw only */ +#define DSPARB_HWCONTROL(dev) (IS_G4X(dev)) #define PRIMARY_RINGBUFFER_SIZE (128*1024) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 17b308592c4f..7ba23a69a0c0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -376,11 +376,15 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) * Clear the PIPE(A|B)STAT regs before the IIR */ if (pipea_stats & 0x8000ffff) { + if (pipea_stats & PIPE_FIFO_UNDERRUN_STATUS) + DRM_DEBUG("pipe a underrun\n"); I915_WRITE(PIPEASTAT, pipea_stats); irq_received = 1; } if (pipeb_stats & 0x8000ffff) { + if (pipeb_stats & PIPE_FIFO_UNDERRUN_STATUS) + DRM_DEBUG("pipe b underrun\n"); I915_WRITE(PIPEBSTAT, pipeb_stats); irq_received = 1; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ad3d1b5db95e..6c0858484094 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -275,7 +275,13 @@ #define INSTPM 0x020c0 #define ACTHD 0x020c8 #define FW_BLC 0x020d8 +#define FW_BLC2 0x020dc #define FW_BLC_SELF 0x020e0 /* 915+ only */ +#define FW_BLC_SELF_EN (1<<15) +#define MM_BURST_LENGTH 0x00700000 +#define MM_FIFO_WATERMARK 0x0001F000 +#define LM_BURST_LENGTH 0x00000700 +#define LM_FIFO_WATERMARK 0x0000001F #define MI_ARB_STATE 0x020e4 /* 915+ only */ #define CACHE_MODE_0 0x02120 /* 915+ only */ #define CM0_MASK_SHIFT 16 @@ -585,17 +591,21 @@ /* Clocking configuration register */ #define CLKCFG 0x10c00 -#define CLKCFG_FSB_400 (0 << 0) /* hrawclk 100 */ +#define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */ #define CLKCFG_FSB_533 (1 << 0) /* hrawclk 133 */ #define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */ #define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */ #define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */ #define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */ -/* this is a guess, could be 5 as well */ +/* Note, below two are guess */ #define CLKCFG_FSB_1600 (4 << 0) /* hrawclk 400 */ -#define CLKCFG_FSB_1600_ALT (5 << 0) /* hrawclk 400 */ +#define CLKCFG_FSB_1600_ALT (0 << 0) /* hrawclk 400 */ #define CLKCFG_FSB_MASK (7 << 0) - +#define CLKCFG_MEM_533 (1 << 4) +#define CLKCFG_MEM_667 (2 << 4) +#define CLKCFG_MEM_800 (3 << 4) +#define CLKCFG_MEM_MASK (7 << 4) + /** GM965 GM45 render standby register */ #define MCHBAR_RENDER_STANDBY 0x111B8 @@ -1595,6 +1605,34 @@ #define DSPARB_CSTART_SHIFT 7 #define DSPARB_BSTART_MASK (0x7f) #define DSPARB_BSTART_SHIFT 0 +#define DSPARB_BEND_SHIFT 9 /* on 855 */ +#define DSPARB_AEND_SHIFT 0 + +#define DSPFW1 0x70034 +#define DSPFW2 0x70038 +#define DSPFW3 0x7003c +#define IGD_SELF_REFRESH_EN (1<<30) + +/* FIFO watermark sizes etc */ +#define I915_FIFO_LINE_SIZE 64 +#define I830_FIFO_LINE_SIZE 32 +#define I945_FIFO_SIZE 127 /* 945 & 965 */ +#define I915_FIFO_SIZE 95 +#define I855GM_FIFO_SIZE 255 +#define I830_FIFO_SIZE 95 +#define I915_MAX_WM 0x3f + +#define IGD_DISPLAY_FIFO 512 /* in 64byte unit */ +#define IGD_FIFO_LINE_SIZE 64 +#define IGD_MAX_WM 0x1ff +#define IGD_DFT_WM 0x3f +#define IGD_DFT_HPLLOFF_WM 0 +#define IGD_GUARD_WM 10 +#define IGD_CURSOR_FIFO 64 +#define IGD_CURSOR_MAX_WM 0x3f +#define IGD_CURSOR_DFT_WM 0 +#define IGD_CURSOR_GUARD_WM 5 + /* * The two pipe frame counter registers are not synchronized, so * reading a stable value is somewhat tricky. The following code diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 73e7b9cecac8..a84ac05ef048 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -25,6 +25,7 @@ */ #include +#include #include "drmP.h" #include "intel_drv.h" #include "i915_drm.h" @@ -34,6 +35,7 @@ #include "drm_crtc_helper.h" bool intel_pipe_has_type (struct drm_crtc *crtc, int type); +static void intel_update_watermarks(struct drm_device *dev); typedef struct { /* given values */ @@ -1005,7 +1007,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - int plane = intel_crtc->pipe; + int plane = intel_crtc->plane; int pch_dpll_reg = (pipe == 0) ? PCH_DPLL_A : PCH_DPLL_B; int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; @@ -1335,8 +1337,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) /* Give the overlay scaler a chance to enable if it's on this pipe */ //intel_crtc_dpms_video(crtc, true); TODO + intel_update_watermarks(dev); break; case DRM_MODE_DPMS_OFF: + intel_update_watermarks(dev); /* Give the overlay scaler a chance to disable if it's on this pipe */ //intel_crtc_dpms_video(crtc, FALSE); TODO @@ -1515,7 +1519,6 @@ static int intel_get_core_clock_speed(struct drm_device *dev) return 0; /* Silence gcc warning */ } - /** * Return the pipe currently connected to the panel fitter, * or -1 if the panel fitter is not present or not in use @@ -1585,6 +1588,420 @@ igdng_compute_m_n(int bytes_per_pixel, int nlanes, } +struct intel_watermark_params { + unsigned long fifo_size; + unsigned long max_wm; + unsigned long default_wm; + unsigned long guard_size; + unsigned long cacheline_size; +}; + +/* IGD has different values for various configs */ +static struct intel_watermark_params igd_display_wm = { + IGD_DISPLAY_FIFO, + IGD_MAX_WM, + IGD_DFT_WM, + IGD_GUARD_WM, + IGD_FIFO_LINE_SIZE +}; +static struct intel_watermark_params igd_display_hplloff_wm = { + IGD_DISPLAY_FIFO, + IGD_MAX_WM, + IGD_DFT_HPLLOFF_WM, + IGD_GUARD_WM, + IGD_FIFO_LINE_SIZE +}; +static struct intel_watermark_params igd_cursor_wm = { + IGD_CURSOR_FIFO, + IGD_CURSOR_MAX_WM, + IGD_CURSOR_DFT_WM, + IGD_CURSOR_GUARD_WM, + IGD_FIFO_LINE_SIZE, +}; +static struct intel_watermark_params igd_cursor_hplloff_wm = { + IGD_CURSOR_FIFO, + IGD_CURSOR_MAX_WM, + IGD_CURSOR_DFT_WM, + IGD_CURSOR_GUARD_WM, + IGD_FIFO_LINE_SIZE +}; +static struct intel_watermark_params i945_wm_info = { + I915_FIFO_LINE_SIZE, + I915_MAX_WM, + 1, + 0, + IGD_FIFO_LINE_SIZE +}; +static struct intel_watermark_params i915_wm_info = { + I945_FIFO_SIZE, + I915_MAX_WM, + 1, + 0, + I915_FIFO_LINE_SIZE +}; +static struct intel_watermark_params i855_wm_info = { + I855GM_FIFO_SIZE, + I915_MAX_WM, + 1, + 0, + I830_FIFO_LINE_SIZE +}; +static struct intel_watermark_params i830_wm_info = { + I830_FIFO_SIZE, + I915_MAX_WM, + 1, + 0, + I830_FIFO_LINE_SIZE +}; + +static unsigned long intel_calculate_wm(unsigned long clock_in_khz, + struct intel_watermark_params *wm, + int pixel_size, + unsigned long latency_ns) +{ + unsigned long bytes_required, wm_size; + + bytes_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; + bytes_required /= wm->cacheline_size; + wm_size = wm->fifo_size - bytes_required - wm->guard_size; + + if (wm_size > wm->max_wm) + wm_size = wm->max_wm; + if (wm_size == 0) + wm_size = wm->default_wm; + return wm_size; +} + +struct cxsr_latency { + int is_desktop; + unsigned long fsb_freq; + unsigned long mem_freq; + unsigned long display_sr; + unsigned long display_hpll_disable; + unsigned long cursor_sr; + unsigned long cursor_hpll_disable; +}; + +static struct cxsr_latency cxsr_latency_table[] = { + {1, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ + {1, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ + {1, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ + + {1, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ + {1, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ + {1, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ + + {1, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ + {1, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ + {1, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ + + {0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ + {0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ + {0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ + + {0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ + {0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ + {0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ + + {0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ + {0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ + {0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ +}; + +static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb, + int mem) +{ + int i; + struct cxsr_latency *latency; + + if (fsb == 0 || mem == 0) + return NULL; + + for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { + latency = &cxsr_latency_table[i]; + if (is_desktop == latency->is_desktop && + fsb == latency->fsb_freq && mem == latency->mem_freq) + break; + } + if (i >= ARRAY_SIZE(cxsr_latency_table)) { + DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); + return NULL; + } + return latency; +} + +static void igd_disable_cxsr(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 reg; + + /* deactivate cxsr */ + reg = I915_READ(DSPFW3); + reg &= ~(IGD_SELF_REFRESH_EN); + I915_WRITE(DSPFW3, reg); + DRM_INFO("Big FIFO is disabled\n"); +} + +static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock, + int pixel_size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 reg; + unsigned long wm; + struct cxsr_latency *latency; + + latency = intel_get_cxsr_latency(IS_IGDG(dev), dev_priv->fsb_freq, + dev_priv->mem_freq); + if (!latency) { + DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); + igd_disable_cxsr(dev); + return; + } + + /* Display SR */ + wm = intel_calculate_wm(clock, &igd_display_wm, pixel_size, + latency->display_sr); + reg = I915_READ(DSPFW1); + reg &= 0x7fffff; + reg |= wm << 23; + I915_WRITE(DSPFW1, reg); + DRM_DEBUG("DSPFW1 register is %x\n", reg); + + /* cursor SR */ + wm = intel_calculate_wm(clock, &igd_cursor_wm, pixel_size, + latency->cursor_sr); + reg = I915_READ(DSPFW3); + reg &= ~(0x3f << 24); + reg |= (wm & 0x3f) << 24; + I915_WRITE(DSPFW3, reg); + + /* Display HPLL off SR */ + wm = intel_calculate_wm(clock, &igd_display_hplloff_wm, + latency->display_hpll_disable, I915_FIFO_LINE_SIZE); + reg = I915_READ(DSPFW3); + reg &= 0xfffffe00; + reg |= wm & 0x1ff; + I915_WRITE(DSPFW3, reg); + + /* cursor HPLL off SR */ + wm = intel_calculate_wm(clock, &igd_cursor_hplloff_wm, pixel_size, + latency->cursor_hpll_disable); + reg = I915_READ(DSPFW3); + reg &= ~(0x3f << 16); + reg |= (wm & 0x3f) << 16; + I915_WRITE(DSPFW3, reg); + DRM_DEBUG("DSPFW3 register is %x\n", reg); + + /* activate cxsr */ + reg = I915_READ(DSPFW3); + reg |= IGD_SELF_REFRESH_EN; + I915_WRITE(DSPFW3, reg); + + DRM_INFO("Big FIFO is enabled\n"); + + return; +} + +const static int latency_ns = 5000; /* default for non-igd platforms */ + + +static void i965_update_wm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + DRM_DEBUG("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR 8\n"); + + /* 965 has limitations... */ + I915_WRITE(DSPFW1, (8 << 16) | (8 << 8) | (8 << 0)); + I915_WRITE(DSPFW2, (8 << 8) | (8 << 0)); +} + +static void i9xx_update_wm(struct drm_device *dev, int planea_clock, + int planeb_clock, int sr_hdisplay, int pixel_size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t fwater_lo = I915_READ(FW_BLC) & MM_FIFO_WATERMARK; + uint32_t fwater_hi = I915_READ(FW_BLC2) & LM_FIFO_WATERMARK; + int bsize, asize, cwm, bwm = 1, awm = 1, srwm = 1; + uint32_t dsparb = I915_READ(DSPARB); + int planea_entries, planeb_entries; + struct intel_watermark_params *wm_params; + unsigned long line_time_us; + int sr_clock, sr_entries = 0; + + if (IS_I965GM(dev) || IS_I945GM(dev)) + wm_params = &i945_wm_info; + else if (IS_I9XX(dev)) + wm_params = &i915_wm_info; + else + wm_params = &i855_wm_info; + + planea_entries = intel_calculate_wm(planea_clock, wm_params, + pixel_size, latency_ns); + planeb_entries = intel_calculate_wm(planeb_clock, wm_params, + pixel_size, latency_ns); + + DRM_DEBUG("FIFO entries - A: %d, B: %d\n", planea_entries, + planeb_entries); + + if (IS_I9XX(dev)) { + asize = dsparb & 0x7f; + bsize = (dsparb >> DSPARB_CSTART_SHIFT) & 0x7f; + } else { + asize = dsparb & 0x1ff; + bsize = (dsparb >> DSPARB_BEND_SHIFT) & 0x1ff; + } + DRM_DEBUG("FIFO size - A: %d, B: %d\n", asize, bsize); + + /* Two extra entries for padding */ + awm = asize - (planea_entries + 2); + bwm = bsize - (planeb_entries + 2); + + /* Sanity check against potentially bad FIFO allocations */ + if (awm <= 0) { + /* pipe is on but has too few FIFO entries */ + if (planea_entries != 0) + DRM_DEBUG("plane A needs more FIFO entries\n"); + awm = 1; + } + if (bwm <= 0) { + if (planeb_entries != 0) + DRM_DEBUG("plane B needs more FIFO entries\n"); + bwm = 1; + } + + /* + * Overlay gets an aggressive default since video jitter is bad. + */ + cwm = 2; + + /* Calc sr entries for one pipe configs */ + if (!planea_clock || !planeb_clock) { + sr_clock = planea_clock ? planea_clock : planeb_clock; + line_time_us = (sr_hdisplay * 1000) / sr_clock; + sr_entries = (((latency_ns / line_time_us) + 1) * pixel_size * + sr_hdisplay) / 1000; + sr_entries = roundup(sr_entries / wm_params->cacheline_size, 1); + if (sr_entries < wm_params->fifo_size) + srwm = wm_params->fifo_size - sr_entries; + } + + DRM_DEBUG("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", + awm, bwm, cwm, srwm); + + fwater_lo = fwater_lo | ((bwm & 0x3f) << 16) | (awm & 0x3f); + fwater_hi = fwater_hi | (cwm & 0x1f); + + I915_WRITE(FW_BLC, fwater_lo); + I915_WRITE(FW_BLC2, fwater_hi); + if (IS_I9XX(dev)) + I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN | (srwm & 0x3f)); +} + +static void i830_update_wm(struct drm_device *dev, int planea_clock, + int pixel_size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + uint32_t fwater_lo = I915_READ(FW_BLC) & MM_FIFO_WATERMARK; + unsigned int asize, awm; + int planea_entries; + + planea_entries = intel_calculate_wm(planea_clock, &i830_wm_info, + pixel_size, latency_ns); + + asize = dsparb & 0x7f; + + awm = asize - planea_entries; + + fwater_lo = fwater_lo | awm; + + I915_WRITE(FW_BLC, fwater_lo); +} + +/** + * intel_update_watermarks - update FIFO watermark values based on current modes + * + * Calculate watermark values for the various WM regs based on current mode + * and plane configuration. + * + * There are several cases to deal with here: + * - normal (i.e. non-self-refresh) + * - self-refresh (SR) mode + * - lines are large relative to FIFO size (buffer can hold up to 2) + * - lines are small relative to FIFO size (buffer can hold more than 2 + * lines), so need to account for TLB latency + * + * The normal calculation is: + * watermark = dotclock * bytes per pixel * latency + * where latency is platform & configuration dependent (we assume pessimal + * values here). + * + * The SR calculation is: + * watermark = (trunc(latency/line time)+1) * surface width * + * bytes per pixel + * where + * line time = htotal / dotclock + * and latency is assumed to be high, as above. + * + * The final value programmed to the register should always be rounded up, + * and include an extra 2 entries to account for clock crossings. + * + * We don't use the sprite, so we can ignore that. And on Crestline we have + * to set the non-SR watermarks to 8. + */ +static void intel_update_watermarks(struct drm_device *dev) +{ + struct drm_crtc *crtc; + struct intel_crtc *intel_crtc; + int sr_hdisplay = 0; + unsigned long planea_clock = 0, planeb_clock = 0, sr_clock = 0; + int enabled = 0, pixel_size = 0; + + if (DSPARB_HWCONTROL(dev)) + return; + + /* Get the clock config from both planes */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + intel_crtc = to_intel_crtc(crtc); + if (crtc->enabled) { + enabled++; + if (intel_crtc->plane == 0) { + DRM_DEBUG("plane A (pipe %d) clock: %d\n", + intel_crtc->pipe, crtc->mode.clock); + planea_clock = crtc->mode.clock; + } else { + DRM_DEBUG("plane B (pipe %d) clock: %d\n", + intel_crtc->pipe, crtc->mode.clock); + planeb_clock = crtc->mode.clock; + } + sr_hdisplay = crtc->mode.hdisplay; + sr_clock = crtc->mode.clock; + if (crtc->fb) + pixel_size = crtc->fb->bits_per_pixel / 8; + else + pixel_size = 4; /* by default */ + } + } + + if (enabled <= 0) + return; + + /* Single pipe configs can enable self refresh */ + if (enabled == 1 && IS_IGD(dev)) + igd_enable_cxsr(dev, sr_clock, pixel_size); + else if (IS_IGD(dev)) + igd_disable_cxsr(dev); + + if (IS_I965G(dev)) + i965_update_wm(dev); + else if (IS_I9XX(dev) || IS_MOBILE(dev)) + i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay, + pixel_size); + else + i830_update_wm(dev, planea_clock, pixel_size); +} + static int intel_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -1951,6 +2368,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, /* Flush the plane changes */ ret = intel_pipe_set_base(crtc, x, y, old_fb); + + intel_update_watermarks(dev); + drm_vblank_post_modeset(dev, pipe); return ret; @@ -2439,6 +2859,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) drm_mode_crtc_set_gamma_size(&intel_crtc->base, 256); intel_crtc->pipe = pipe; + intel_crtc->plane = pipe; for (i = 0; i < 256; i++) { intel_crtc->lut_r[i] = i; intel_crtc->lut_g[i] = i; From 788d84bba47ea3eb377f7a3ae4fd1ee84b84877b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 18:34:52 +0100 Subject: [PATCH 308/741] Fix pci_unmap_addr() et al on i386. We can run a 32-bit kernel on boxes with an IOMMU, so we need pci_unmap_addr() etc. to work -- without it, drivers will leak mappings. To be honest, this whole thing looks like it's more pain than it's worth; I'm half inclined to remove the no-op #else case altogether. But this is the minimal fix, which just does the right thing if CONFIG_DMAR is set. Signed-off-by: David Woodhouse Cc: stable@kernel.org [ for 2.6.30 ] Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 927958d13c19..1ff685ca221c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -91,7 +91,7 @@ extern void pci_iommu_alloc(void); #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) +#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; From a15a519ed6e5e644f5a33c213c00b0c1d3cfe683 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 18:49:06 +0100 Subject: [PATCH 309/741] Fix iommu address space allocation This fixes kernel.org bug #13584. The IOVA code attempted to optimise the insertion of new ranges into the rbtree, with the unfortunate result that some ranges just didn't get inserted into the tree at all. Then those ranges would be handed out more than once, and things kind of go downhill from there. Introduced after 2.6.25 by ddf02886cbe665d67ca750750196ea5bf524b10b ("PCI: iova RB tree setup tweak"). Signed-off-by: David Woodhouse Cc: mark gross Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/pci/iova.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 2287116e9822..46dd440e2315 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -1,9 +1,19 @@ /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2009, Intel Corporation. * - * This file is released under the GPLv2. + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. * - * Copyright (C) 2006-2008 Intel Corporation * Author: Anil S Keshavamurthy */ @@ -123,7 +133,15 @@ move_left: /* Insert the new_iova into domain rbtree by holding writer lock */ /* Add new node and rebalance tree. */ { - struct rb_node **entry = &((prev)), *parent = NULL; + struct rb_node **entry, *parent = NULL; + + /* If we have 'prev', it's a valid place to start the + insertion. Otherwise, start from the root. */ + if (prev) + entry = &prev; + else + entry = &iovad->rbroot.rb_node; + /* Figure out where to put new node */ while (*entry) { struct iova *this = container_of(*entry, From 6ff4fd05676bc5b5c930bef25901e489f7843660 Mon Sep 17 00:00:00 2001 From: "ling.ma@intel.com" Date: Thu, 25 Jun 2009 10:59:22 +0800 Subject: [PATCH 310/741] drm/i915: Set SSC frequency for 8xx chips correctly All 8xx class chips have the 66/48 split, not just 855. Signed-off-by: Ma Ling Reviewed-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_bios.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 716409a57244..da22863c05c0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -195,10 +195,12 @@ parse_general_features(struct drm_i915_private *dev_priv, dev_priv->lvds_use_ssc = general->enable_ssc; if (dev_priv->lvds_use_ssc) { - if (IS_I855(dev_priv->dev)) - dev_priv->lvds_ssc_freq = general->ssc_freq ? 66 : 48; - else - dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; + if (IS_I85X(dev_priv->dev)) + dev_priv->lvds_ssc_freq = + general->ssc_freq ? 66 : 48; + else + dev_priv->lvds_ssc_freq = + general->ssc_freq ? 100 : 96; } } } From c85994e4771025ef2a66533eb1a4c6c2217b9cda Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 1 Jul 2009 19:21:24 +0100 Subject: [PATCH 311/741] intel-iommu: Ensure that PTE writes are 64-bit atomic, even on i386 Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ec7e032d5ab5..eea1006c860a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -222,7 +222,12 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) static inline u64 dma_pte_addr(struct dma_pte *pte) { - return (pte->val & VTD_PAGE_MASK); +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif } static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) @@ -712,6 +717,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, break; if (!dma_pte_present(pte)) { + uint64_t pteval; + tmp_page = alloc_pgtable_page(); if (!tmp_page) { @@ -719,15 +726,15 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, flags); return NULL; } - domain_flush_cache(domain, tmp_page, PAGE_SIZE); - dma_set_pte_pfn(pte, virt_to_dma_pfn(tmp_page)); - /* - * high level table always sets r/w, last level page - * table control read/write - */ - dma_set_pte_readable(pte); - dma_set_pte_writable(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); + pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (cmpxchg64(&pte->val, 0ULL, pteval)) { + /* Someone else set it while we were thinking; use theirs. */ + free_pgtable_page(tmp_page); + } else { + dma_pte_addr(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + } } parent = phys_to_virt(dma_pte_addr(pte)); level--; @@ -1666,6 +1673,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } while (nr_pages--) { + uint64_t tmp; + if (!sg_res) { sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; @@ -1680,17 +1689,17 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* We don't need lock here, nobody else * touches the iova range */ - if (unlikely(dma_pte_addr(pte))) { + tmp = cmpxchg64(&pte->val, 0ULL, pteval); + if (tmp) { static int dumps = 5; - printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx)\n", - iov_pfn, pte->val); + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", + iov_pfn, tmp, (unsigned long long)pteval); if (dumps) { dumps--; debug_dma_dump_mappings(NULL); } WARN_ON(1); } - pte->val = pteval; pte++; if (!nr_pages || (unsigned long)pte >> VTD_PAGE_SHIFT != From d960eea974f5e500c0dcb95a934239cc1f481cfd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 29 Jun 2009 14:54:11 -0700 Subject: [PATCH 312/741] kernel-doc: move ignoring kmemcheck Somehow I managed to generate a diff that put these 2 lines into the wrong function: should have been in dump_struct() instead of in dump_enum(). Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- scripts/kernel-doc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index ed591e9b7d1d..b52d340d759d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1426,6 +1426,8 @@ sub dump_struct($$) { # strip comments: $members =~ s/\/\*.*?\*\///gos; $nested =~ s/\/\*.*?\*\///gos; + # strip kmemcheck_bitfield_{begin,end}.*; + $members =~ s/kmemcheck_bitfield_.*?;//gos; create_parameterlist($members, ';', $file); check_sections($file, $declaration_name, "struct", $sectcheck, $struct_actual, $nested); @@ -1468,8 +1470,6 @@ sub dump_enum($$) { } } - # strip kmemcheck_bitfield_{begin,end}.*; - $members =~ s/kmemcheck_bitfield_.*?;//gos; output_declaration($declaration_name, 'enum', From 34e19ada994fb9cb3d11873f2b734602e2135f3a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 1 Jul 2009 18:14:18 +0000 Subject: [PATCH 313/741] sh: Fix compiler error and include the definition of IS_ERR_VALUE When arch/sh/include/asm/syscall_32.h is included from a file that doesn't also include linux/err.h the following error is produced, In file included from /home/matt/src/kernels/sh-2.6/arch/sh/include/asm/syscall.h:5, from kernel/trace/trace_syscalls.c:3: /home/matt/src/kernels/sh-2.6/arch/sh/include/asm/syscall_32.h: In function 'syscall_get_error': /home/matt/src/kernels/sh-2.6/arch/sh/include/asm/syscall_32.h:28: error: implicit declaration of function 'IS_ERR_VALUE' make[2]: *** [kernel/trace/trace_syscalls.o] Error 1 make[1]: *** [kernel/trace] Error 2 make: *** [kernel] Error 2 Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/include/asm/syscall_32.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 5bc34681d994..6f83f2cc45c1 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -3,6 +3,7 @@ #include #include +#include #include /* The system call number is given by the user in R3 */ From 1c6a307a54668eda556f499c94e75086aaf8f80f Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 1 Jul 2009 06:50:31 +0000 Subject: [PATCH 314/741] sh: LCDC dcache flush for deferred io Since writenotify on uncached vmas is unsupported in 2.6.31, live with cached framebuffer memory in the deferred io case for now and flush the dcache before forcing refresh. Signed-off-by: Paul Mundt Acked-by: Magnus damm --- drivers/video/sh_mobile_lcdcfb.c | 53 +++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index f10d2fbeda06..da983b720f08 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -17,6 +17,7 @@ #include #include #include +#include #include

Some time ago i report this: http://bugzilla.kernel.org/show_bug.cgi?id=6648 and now with 2.6.29 / 2.6.29.1 / 2.6.29.3 and 2.6.30 it back dmesg output: oprofile: using NMI interrupt. Fix inflate_threshold_root. Now=15 size=11 bits ... Fix inflate_threshold_root. Now=15 size=11 bits cat /proc/net/fib_triestat Basic info: size of leaf: 40 bytes, size of tnode: 56 bytes. Main: Aver depth: 2.28 Max depth: 6 Leaves: 276539 Prefixes: 289922 Internal nodes: 66762 1: 35046 2: 13824 3: 9508 4: 4897 5: 2331 6: 1149 7: 5 9: 1 18: 1 Pointers: 691228 Null ptrs: 347928 Total size: 35709 kB
It seems, the current threshold for root resizing is too aggressive, and it causes misleading warnings during big updates, but it might be also responsible for memory problems, especially with non-preempt configs, when RCU freeing is delayed long after call_rcu. It should be also mentioned that because of non-atomic changes during resizing/rebalancing the current lookup algorithm can miss valid leaves so it's additional argument to shorten these activities even at a cost of a minimally longer searching. This patch restores values before the patch "[IPV4]: fib_trie root node settings", commit: 965ffea43d4ebe8cd7b9fee78d651268dd7d23c5 from v2.6.22. Pawel's report:
I dont see any big change of (cpu load or faster/slower routing/propagating routes from bgpd or something else) - in avg there is from 2% to 3% more of CPU load i dont know why but it is - i change from "preempt" to "no preempt" 3 times and check this my "mpstat -P ALL 1 30" always avg cpu load was from 2 to 3% more compared to "no preempt" [...] cat /proc/net/fib_triestat Basic info: size of leaf: 20 bytes, size of tnode: 36 bytes. Main: Aver depth: 2.44 Max depth: 6 Leaves: 277814 Prefixes: 291306 Internal nodes: 66420 1: 32737 2: 14850 3: 10332 4: 4871 5: 2313 6: 942 7: 371 8: 3 17: 1 Pointers: 599098 Null ptrs: 254865 Total size: 18067 kB
According to this and other similar reports average depth is slightly increased (~0.2), and root nodes are shorter (log 17 vs. 18), but there is no visible performance decrease. So, until memory handling is improved or added parameters for changing this individually, this patch resets to safer defaults. Reported-by: Pawel Staszewski Reported-by: Jorge Boncompte [DTI2] Signed-off-by: Jarek Poplawski Tested-by: Pawel Staszewski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 00a54b246dfe..63c2fa7b68c4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -316,8 +316,8 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; -static const int halve_threshold_root = 8; -static const int inflate_threshold_root = 15; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) From 35976d4d557c5017c2180a083e8bd970cf73f3d5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2009 03:05:14 +0000 Subject: [PATCH 665/741] r6040: restore MIER register correctly when IRQ line is shared When the r6040 device IRQ line is shared we will enter the driver interrupt service routine, mask off the device interrupt enable register (MIER) and return with IRQ_NONE, we would then leave the device with interrupts disabled, this patch fixes that issue. Reported-by: Steve Holland Signed-off-by: Joe Chou Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/r6040.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index ed63d23a6452..70aac35dc2db 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -704,8 +704,11 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id) /* Read MISR status and clear */ status = ioread16(ioaddr + MISR); - if (status == 0x0000 || status == 0xffff) + if (status == 0x0000 || status == 0xffff) { + /* Restore RDC MAC interrupt */ + iowrite16(misr, ioaddr + MIER); return IRQ_NONE; + } /* RX interrupt request */ if (status & RX_INTS) { From c3b85423072c0739d76b7c54080d3f3ccc5dad4d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2009 03:05:48 +0000 Subject: [PATCH 666/741] r6040: bump driver version to 0.24 and date to 08 July 2009 Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/r6040.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 70aac35dc2db..961b5397a531 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -49,8 +49,8 @@ #include #define DRV_NAME "r6040" -#define DRV_VERSION "0.23" -#define DRV_RELDATE "05May2009" +#define DRV_VERSION "0.24" +#define DRV_RELDATE "08Jul2009" /* PHY CHIP Address */ #define PHY1_ADDR 1 /* For MAC1 */ From b4b223cdd5981f776491134faa7bc4ac342b44d4 Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Thu, 18 Jun 2009 17:54:03 +0200 Subject: [PATCH 667/741] zd1211rw: 07b8:6001 is a ZD1211B On a shuttle machine here we got 07b8:6001 device, handled by zd1211rw, which does not work. Scanning is OK but association does not work, we get "direct probe to AP xxx timed out" It appears that this simple patch makes the device work perfectly. This id was already there in initial import of the driver so I don't know if it has ever been working as ZD1211 (which would mean they changed it and kept the id :( ). Signed-off-by: Pascal Terjan Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 14a19baff214..9b31afc5a0c7 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -38,7 +38,6 @@ static struct usb_device_id usb_ids[] = { /* ZD1211 */ { USB_DEVICE(0x0ace, 0x1211), .driver_info = DEVICE_ZD1211 }, { USB_DEVICE(0x0ace, 0xa211), .driver_info = DEVICE_ZD1211 }, - { USB_DEVICE(0x07b8, 0x6001), .driver_info = DEVICE_ZD1211 }, { USB_DEVICE(0x126f, 0xa006), .driver_info = DEVICE_ZD1211 }, { USB_DEVICE(0x6891, 0xa727), .driver_info = DEVICE_ZD1211 }, { USB_DEVICE(0x0df6, 0x9071), .driver_info = DEVICE_ZD1211 }, @@ -87,6 +86,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0471, 0x1237), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x07fa, 0x1196), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0df6, 0x0036), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x07b8, 0x6001), .driver_info = DEVICE_ZD1211B }, /* "Driverless" devices that need ejecting */ { USB_DEVICE(0x0ace, 0x2011), .driver_info = DEVICE_INSTALLER }, { USB_DEVICE(0x0ace, 0x20ff), .driver_info = DEVICE_INSTALLER }, From 8b339d05805fb91cc0b5179af5b4d05d9f8b949c Mon Sep 17 00:00:00 2001 From: Hin-Tak Leung Date: Fri, 26 Jun 2009 05:28:15 +0100 Subject: [PATCH 668/741] zd1211rw: adding SONY IFU-WLM2 (054c:0257) as a zd1211b device Yevgen Kotikov reported success on the sourceforge zd1211-devs list with the following details: Brand/retail: SONY IFU-WLM2 USB-IDs: Vendor: 0x054C Device: 0x0257 chip ID: zd1211b chip 054c:0257 v4802 high 00-0b-6b AL2230_RF pa0 ----- FCC ID: unknown Signed-off-by: Hin-Tak Leung Tested-by: Yevgen Kotikov Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 9b31afc5a0c7..0e6e44689cc6 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -60,6 +60,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x157e, 0x300a), .driver_info = DEVICE_ZD1211 }, { USB_DEVICE(0x0105, 0x145f), .driver_info = DEVICE_ZD1211 }, /* ZD1211B */ + { USB_DEVICE(0x054c, 0x0257), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0ace, 0x1215), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0ace, 0xb215), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x157e, 0x300d), .driver_info = DEVICE_ZD1211B }, From cff782cd94df7adea84af6aa9516c8088f7ea950 Mon Sep 17 00:00:00 2001 From: Clyde McPherson Date: Tue, 30 Jun 2009 22:39:28 -0500 Subject: [PATCH 669/741] b43: Add support for 4318E Added support for the Broadcom 4318E chipset on PCMCIA/CF cards. The 4318E can do 802.11A/B/G, only B and G mode are supported in b43. Signed-off-by: Clyde McPherson Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/b43/pcmcia.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 3cfc30307a27..6c3a74964ab8 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -35,6 +35,7 @@ static /*const */ struct pcmcia_device_id b43_pcmcia_tbl[] = { PCMCIA_DEVICE_MANF_CARD(0x2D0, 0x448), + PCMCIA_DEVICE_MANF_CARD(0x2D0, 0x476), PCMCIA_DEVICE_NULL, }; From 2fbddeb5c409c90be4706ea2beb7f1fc02100c72 Mon Sep 17 00:00:00 2001 From: Clyde McPherson Date: Tue, 30 Jun 2009 22:39:43 -0500 Subject: [PATCH 670/741] ssb: Add support for 4318E Added support for the Broadcom 4318E chipset on PCMCIA/CF cards. The 4318E can do 802.11A/B/G, only B and G mode are supported in b43. Signed-off-by: Clyde McPherson Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/ssb/pcmcia.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index fbfadbac67e8..d288608d2206 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -678,7 +678,8 @@ int ssb_pcmcia_get_invariants(struct ssb_bus *bus, sprom->board_rev = tuple.TupleData[1]; break; case SSB_PCMCIA_CIS_PA: - GOTO_ERROR_ON(tuple.TupleDataLen != 9, + GOTO_ERROR_ON((tuple.TupleDataLen != 9) && + (tuple.TupleDataLen != 10), "pa tpl size"); sprom->pa0b0 = tuple.TupleData[1] | ((u16)tuple.TupleData[2] << 8); @@ -718,7 +719,8 @@ int ssb_pcmcia_get_invariants(struct ssb_bus *bus, sprom->antenna_gain.ghz5.a3 = tuple.TupleData[1]; break; case SSB_PCMCIA_CIS_BFLAGS: - GOTO_ERROR_ON(tuple.TupleDataLen != 3, + GOTO_ERROR_ON((tuple.TupleDataLen != 3) && + (tuple.TupleDataLen != 5), "bfl tpl size"); sprom->boardflags_lo = tuple.TupleData[1] | ((u16)tuple.TupleData[2] << 8); From 4ff176674e75bdee9022dded415fb805f15700ad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Jul 2009 03:43:02 +0200 Subject: [PATCH 671/741] mac80211_hwsim: avoid NULL access There's a race condition -- started can be set to true before channel is set due to the way mac80211 callbacks currently work (->start should probably pass the channel we would like to have initially). For now simply add a check to hwsim to avoid dereferencing the NULL channel pointer. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e789c6e9938c..a111bda392e2 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -418,6 +418,7 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, continue; if (!data2->started || !hwsim_ps_rx_ok(data2, skb) || + !data->channel || !data2->channel || data->channel->center_freq != data2->channel->center_freq || !(data->group & data2->group)) continue; From b9744d19e35d74f965fb94bd55f9313d3a7d9e54 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Jul 2009 11:10:12 +0200 Subject: [PATCH 672/741] mac80211: fix docbook These two functions no longer exist in mac80211, so trying to insert them generates warnings in the document. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/mac80211.tmpl | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index e36986663570..f3f37f141dbd 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl @@ -184,8 +184,6 @@ usage should require reading the full document. !Finclude/net/mac80211.h ieee80211_ctstoself_get !Finclude/net/mac80211.h ieee80211_ctstoself_duration !Finclude/net/mac80211.h ieee80211_generic_frame_duration -!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb -!Finclude/net/mac80211.h ieee80211_hdrlen !Finclude/net/mac80211.h ieee80211_wake_queue !Finclude/net/mac80211.h ieee80211_stop_queue !Finclude/net/mac80211.h ieee80211_wake_queues From 804ef71ee183121de5e9bca1d70d114c97300e17 Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Tue, 7 Jul 2009 11:18:46 -0700 Subject: [PATCH 673/741] Atheros Kconfig needs to be dependent on WLAN_80211 Atheros top level menu needs a "depends WLAN_80211" to properly indent within menuconfig and xconfig interfaces. This is purely a visual issue but it effects all subsequent drivers. The issue is the top level menu does not include a dependency on WLAN_80211 so within the tree structure, Atheros is at the same level as WLAN_80211 but when WLAN_80211 collapsed, the menu disappears along with all subsequent drives, so it is really a subordinate. Signed-off-by: Jay Sternberg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig index d26e7b485315..eb0337c49546 100644 --- a/drivers/net/wireless/ath/Kconfig +++ b/drivers/net/wireless/ath/Kconfig @@ -1,5 +1,6 @@ config ATH_COMMON tristate "Atheros Wireless Cards" + depends on WLAN_80211 depends on ATH5K || ATH9K || AR9170_USB source "drivers/net/wireless/ath/ath5k/Kconfig" From 47ab3840a389ff1b9959734995123e5bc94c3443 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 8 Jul 2009 08:33:02 -0500 Subject: [PATCH 674/741] p54: tx refused but queue active In the mainline kernel, p54usb will fail because the TX queue length can become < 0. This problem has been reported as Bugzilla #13725. The failure is expressed by the following message in the logs: WARNING: at net/mac80211/tx.c:1325 ieee80211_tx+0x23c/0x298 [mac80211]() Hardware name: HP Pavilion dv2700 Notebook PC tx refused but queue active This problem has been recently observed in the wireless-testing tree, where a full solution is being tested. That fix is too invasive for 2.6.31-rcX, but the simple change supplied here will prevent the failure. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54common.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 48d81d98e12d..22ca122bd798 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -912,13 +912,14 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) } __skb_unlink(entry, &priv->tx_queue); - spin_unlock_irqrestore(&priv->tx_queue.lock, flags); frame_len = entry->len; entry_hdr = (struct p54_hdr *) entry->data; entry_data = (struct p54_tx_data *) entry_hdr->data; - priv->tx_stats[entry_data->hw_queue].len--; + if (priv->tx_stats[entry_data->hw_queue].len) + priv->tx_stats[entry_data->hw_queue].len--; priv->stats.dot11ACKFailureCount += payload->tries - 1; + spin_unlock_irqrestore(&priv->tx_queue.lock, flags); /* * Frames in P54_QUEUE_FWSCAN and P54_QUEUE_BEACON are From 1ce822fa04fd6878f079461a4b8affe4bb5ec27b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 8 Jul 2009 21:25:54 +0530 Subject: [PATCH 675/741] includecheck fix: include/linux, rfkill.h fix the following 'make includecheck' warning: include/linux/rfkill.h: linux/types.h is included more than once. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index e73e2429a1b1..2ce29831feb6 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -99,7 +99,6 @@ enum rfkill_user_states { #undef RFKILL_STATE_UNBLOCKED #undef RFKILL_STATE_HARD_BLOCKED -#include #include #include #include From b5daa70a4a55a807e893fe7f94289c61c50a6e5f Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 8 Jul 2009 21:59:17 +0200 Subject: [PATCH 676/741] fsl-diu-fb: fix regression with uninitalized fb_info->mm_lock mutex Remove call to the fsl_diu_set_par before the register_framebuffer(). This fixes a problem with uninitialized the fb_info->mm_lock mutex introduced by the commit 537a1bf059f " fbdev: add mutex for fb_mmap locking" Signed-off-by: Krzysztof Helt Tested-by: "Kai Jiang" Signed-off-by: Linus Torvalds --- drivers/video/fsl-diu-fb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index 0bf2190928d0..72d68b3dc478 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -1223,12 +1223,6 @@ static int __devinit install_fb(struct fb_info *info) return -EINVAL; } - if (fsl_diu_set_par(info)) { - printk(KERN_ERR "fb_set_par failed"); - fb_dealloc_cmap(&info->cmap); - return -EINVAL; - } - if (register_framebuffer(info) < 0) { printk(KERN_ERR "register_framebuffer failed"); unmap_video_memory(info); From 1d01e83557105e7b3bf1623ad2b814d55e1c2990 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 8 Jul 2009 22:26:16 +0200 Subject: [PATCH 677/741] atmel_lcdfb: fix regression with uninitalized fb_info->mm_lock mutex Remove not needed locking of the fb_info->mm_lock mutex before a frambuffer is registered. This fixes a problem with uninitialized the fb_info->mm_lock mutex introduced by the commit 537a1bf059f " fbdev: add mutex for fb_mmap locking" Signed-off-by: Krzysztof Helt Signed-off-by: Linus Torvalds --- drivers/video/atmel_lcdfb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c index cb88394ba995..da05f0801bb7 100644 --- a/drivers/video/atmel_lcdfb.c +++ b/drivers/video/atmel_lcdfb.c @@ -261,6 +261,9 @@ static inline void atmel_lcdfb_free_video_memory(struct atmel_lcdfb_info *sinfo) /** * atmel_lcdfb_alloc_video_memory - Allocate framebuffer memory * @sinfo: the frame buffer to allocate memory for + * + * This function is called only from the atmel_lcdfb_probe() + * so no locking by fb_info->mm_lock around smem_len setting is needed. */ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo) { @@ -270,9 +273,7 @@ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo) smem_len = (var->xres_virtual * var->yres_virtual * ((var->bits_per_pixel + 7) / 8)); - mutex_lock(&info->mm_lock); info->fix.smem_len = max(smem_len, sinfo->smem_len); - mutex_unlock(&info->mm_lock); info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len, (dma_addr_t *)&info->fix.smem_start, GFP_KERNEL); From 5ddf1e0ff00fd808c048d0b920784828276cc516 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 5 Jul 2009 11:01:02 -0400 Subject: [PATCH 678/741] cifs: fix regression with O_EXCL creates and optimize away lookup cifs: fix regression with O_EXCL creates and optimize away lookup Signed-off-by: Jeff Layton Tested-by: Shirish Pargaonkar CC: Stable Kernel Signed-off-by: Steve French --- fs/cifs/dir.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a40054faed7f..ff55fc6932cb 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -643,6 +643,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } } + /* + * O_EXCL: optimize away the lookup, but don't hash the dentry. Let + * the VFS handle the create. + */ + if (nd->flags & LOOKUP_EXCL) { + d_instantiate(direntry, NULL); + return 0; + } + /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) in which we already have the sb rename sem */ From 5e1596f75395e7a402e1059c518e633d2732dcf8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 8 Jul 2009 16:14:23 -0400 Subject: [PATCH 679/741] [CPUFREQ] Fix compile failure in cpufreq.c managed_policy is out of scope for the non-smp case. Declare it locally where used (twice) Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c668ac855f71..b90eda8b3440 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -776,9 +776,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) struct sys_device *cpu_sys_dev; unsigned long flags; unsigned int j; -#ifdef CONFIG_SMP - struct cpufreq_policy *managed_policy; -#endif if (cpu_is_offline(cpu)) return 0; @@ -854,6 +851,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) #endif for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + if (cpu == j) continue; @@ -932,6 +931,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) /* symlink affected CPUs */ for_each_cpu(j, policy->cpus) { + struct cpufreq_policy *managed_policy; + if (j == cpu) continue; if (!cpu_online(j)) From 2e3167308048ca6c810733384d8289082f7e4ec1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 8 Jul 2009 17:05:32 -0700 Subject: [PATCH 680/741] fealnx: Fix build breakage -- PR_CONT should be KERN_CONT Commit ad361c98 ("Remove multiple KERN_ prefixes from printk formats") broke the build for fealnx because it added some "printk(PR_CONT ..." calls, when PR_CONT doesn't exist; it should be "printk(KERN_CONT ..." Signed-off-by: Roland Dreier Cc: Joe Perches Signed-off-by: Linus Torvalds --- drivers/net/fealnx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c index 053fb49820b9..48385c42ab57 100644 --- a/drivers/net/fealnx.c +++ b/drivers/net/fealnx.c @@ -1216,13 +1216,13 @@ static void fealnx_tx_timeout(struct net_device *dev) { printk(KERN_DEBUG " Rx ring %p: ", np->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) - printk(PR_CONT " %8.8x", + printk(KERN_CONT " %8.8x", (unsigned int) np->rx_ring[i].status); printk(KERN_CONT "\n"); printk(KERN_DEBUG " Tx ring %p: ", np->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) - printk(PR_CONT " %4.4x", np->tx_ring[i].status); - printk(PR_CONT "\n"); + printk(KERN_CONT " %4.4x", np->tx_ring[i].status); + printk(KERN_CONT "\n"); } spin_lock_irqsave(&np->lock, flags); From 1b614fb9a00e97b1eab54d4e442d405229c059dd Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 8 Jul 2009 20:09:44 -0700 Subject: [PATCH 681/741] netpoll: Fix carrier detection for drivers that are using phylib Using early netconsole and gianfar driver this error pops up: netconsole: timeout waiting for carrier It appears that net/core/netpoll.c:netpoll_setup() is using cond_resched() in a loop waiting for a carrier. The thing is that cond_resched() is a no-op when system_state != SYSTEM_RUNNING, and so drivers/net/phy/phy.c's state_queue is never scheduled, therefore link detection doesn't work. I belive that the main problem is in cond_resched()[1], but despite how the cond_resched() story ends, it might be a good idea to call msleep(1) instead of cond_resched(), as suggested by Andrew Morton. [1] http://lkml.org/lkml/2009/7/7/463 Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9675f312830d..df30feb2fc72 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -740,7 +740,7 @@ int netpoll_setup(struct netpoll *np) np->name); break; } - cond_resched(); + msleep(1); } /* If carrier appears to come up instantly, we don't From 086b3640c10ab448a6993c4bae1508f496f530c4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 29 Jun 2009 16:25:33 +0300 Subject: [PATCH 682/741] UBIFS: dump a little more in case of corruptions In case of corruptions, dump 8192 bytes instead of 4096. The largest node is 4096+ bytes, so it is better to see a node boundary, which is not always possible when only 4096 bytes are printed. Signed-off-by: Artem Bityutskiy Reviewed-by: Adrian Hunter --- fs/ubifs/scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 0ed82479b44b..165c14ba1a46 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + ubifs_err("corruption at LEB %d:%d", lnum, offs); if (dbg_failure_mode) return; len = c->leb_size - offs; - if (len > 4096) - len = 4096; + if (len > 8192) + len = 8192; dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } From ed43f2f06cc1cec7ec2dc235c908530bc8c796eb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 29 Jun 2009 17:59:23 +0300 Subject: [PATCH 683/741] UBIFS: small amendments in the LEB scanning code This patch fixes few minor things I've spotted while going through code: 1. Better document return codes 2. If 'ubifs_scan_a_node()' returns some thing we do not expect, treat this as an error. 3. Try to do recovery only when 'ubifs_scan()' returns %-EUCLEAN, not on any error. 4. If empty space starts at a non-aligned address, print a message. Signed-off-by: Artem Bityutskiy Reviewed-by: Adrian Hunter --- fs/ubifs/recovery.c | 7 ++++--- fs/ubifs/replay.c | 7 ++++--- fs/ubifs/scan.c | 14 +++++++++----- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 805605250f12..093a1ecb700f 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -543,8 +543,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * - * This function returns %0 on success and a negative error code on failure. + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is + * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) @@ -643,7 +643,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 769be42f39d6..2970500f32df 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) dbg_mnt("replay log LEB %d:%d", lnum, offs); sleb = ubifs_scan(c, lnum, offs, sbuf); - if (IS_ERR(sleb)) { - if (c->need_recovery) - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb) ) { + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) + return PTR_ERR(sleb); + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return PTR_ERR(sleb); } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 165c14ba1a46..892ebfee4fe5 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @sbuf: scan buffer (must be c->leb_size) * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns an error code in case of failure. + * its contents. Returns the scaned information in case of success and, + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case + * of failure. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, int offs, void *sbuf) @@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); - if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } err = ubifs_add_snod(c, sleb, buf, offs); @@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, len -= node_len; } - if (offs % c->min_io_size) - goto corrupted; + if (offs % c->min_io_size) { + ubifs_err("empty space starts at non-aligned offset %d", offs); + goto corrupted;; + } ubifs_end_scan(c, sleb, lnum, offs); From 431102fed3effe4e4e19678830ddab7f05c34bf9 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 29 Jun 2009 18:58:34 +0300 Subject: [PATCH 684/741] UBIFS: clean up free space checking recovery.c has 'is_empty()' helper and it is better to use this helper instead of re-implementing it in several places. This patch does this and removes some amount of unneeded code. Signed-off-by: Artem Bityutskiy Reviewed-by: Adrian Hunter --- fs/ubifs/recovery.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 093a1ecb700f..fe7af9f676b0 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -357,11 +357,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) empty_offs = ALIGN(offs + 1, c->min_io_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; - - for (; check_len > 0; check_len--) - if (*p++ != 0xff) - return 0; - return 1; + return is_empty(p, check_len); } /** @@ -814,7 +810,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, static int recover_head(const struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err, need_clean = 0; + int len, err; if (c->min_io_size > 1) len = c->min_io_size; @@ -828,19 +824,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, /* Read at the head location and check it is empty flash */ err = ubi_read(c->ubi, lnum, sbuf, offs, len); - if (err) - need_clean = 1; - else { - uint8_t *p = sbuf; - - while (len--) - if (*p++ != 0xff) { - need_clean = 1; - break; - } - } - - if (need_clean) { + if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); From 061125476039a9a998878468a6abe235b1cee347 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 29 Jun 2009 19:27:14 +0300 Subject: [PATCH 685/741] UBIFS: fix corruption dump In the 'ubifs_recover_leb()' function, when we find corrupted empty space, we dump 8K starting from the offset where the last node ends. This is OK if the corrupted empty space is somewhere near that offset. But if the corruption is far at the end of the LEB, we will dump all 0xFF bytes and complitely ignore the interesting data. This is observed on a PPC ("kilauea") with NOR flash. This patch changes the behavior and teaches UBIFS to print only interesting data. I.e., now we find where corruption starts and start dumping from that offset. Signed-off-by: Artem Bityutskiy Reviewed-by: Adrian Hunter --- fs/ubifs/recovery.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index fe7af9f676b0..e5f6cf8a1155 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -52,6 +52,25 @@ static int is_empty(void *buf, int len) return 1; } +/** + * first_non_ff - find offset of the first non-0xff byte. + * @buf: buffer to search in + * @len: length of buffer + * + * This function returns offset of the first non-0xff byte in @buf or %-1 if + * the buffer contains only 0xff bytes. + */ +static int first_non_ff(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return i; + return -1; +} + /** * get_master_node - get the last valid master node allowing for corruption. * @c: UBIFS file-system description object @@ -649,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, clean_buf(c, &buf, lnum, &offs, &len); need_clean = 1; } else { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + int corruption = first_non_ff(buf, len); + + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ + offs = corruption; + buf += corruption; goto corrupted; } } From 369693dc93533097c0ca7243affb4f3244c336e8 Mon Sep 17 00:00:00 2001 From: Paul Vojta Date: Wed, 8 Jul 2009 23:57:46 -0700 Subject: [PATCH 686/741] ALSA: hda - fix beep tone calculation for IDT/STAC codecs In the beep tone calculation for IDT/STAC codecs, lower numbers correspond to higher frequencies and vice versa. The current code has this backwards, resulting in beep frequencies which are way too high (and sound bad on tinny laptop speakers, resulting in complaints). [Also added hz <= 0 check by tiwai] Signed-off-by: Paul Vojta Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_beep.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c index 29272f2e95a0..b0275a050870 100644 --- a/sound/pci/hda/hda_beep.c +++ b/sound/pci/hda/hda_beep.c @@ -50,19 +50,22 @@ static void snd_hda_generate_beep(struct work_struct *work) * The tone frequency of beep generator on IDT/STAC codecs is * defined from the 8bit tone parameter, in Hz, * freq = 48000 * (257 - tone) / 1024 - * that is from 12kHz to 93.75kHz in step of 46.875 hz + * that is from 12kHz to 93.75Hz in steps of 46.875 Hz */ static int beep_linear_tone(struct hda_beep *beep, int hz) { + if (hz <= 0) + return 0; hz *= 1000; /* fixed point */ - hz = hz - DIGBEEP_HZ_MIN; + hz = hz - DIGBEEP_HZ_MIN + + DIGBEEP_HZ_STEP / 2; /* round to nearest step */ if (hz < 0) hz = 0; /* turn off PC beep*/ else if (hz >= (DIGBEEP_HZ_MAX - DIGBEEP_HZ_MIN)) - hz = 0xff; + hz = 1; /* max frequency */ else { hz /= DIGBEEP_HZ_STEP; - hz++; + hz = 255 - hz; } return hz; } From 264ef8a904943ed7d0b04fa958894d7a5c2b2c61 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Jul 2009 10:33:01 +0100 Subject: [PATCH 687/741] kmemleak: Remove alloc_bootmem annotations introduced in the past kmemleak_alloc() calls were added in some places where alloc_bootmem was called. Since now kmemleak tracks bootmem allocations, these explicit calls should be run. Signed-off-by: Catalin Marinas Cc: Ingo Molnar Acked-by: Pekka Enberg --- kernel/pid.c | 7 ------- mm/page_alloc.c | 14 +++----------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/kernel/pid.c b/kernel/pid.c index 5fa1db48d8b7..31310b5d3f50 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -36,7 +36,6 @@ #include #include #include -#include #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) @@ -513,12 +512,6 @@ void __init pidhash_init(void) pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); if (!pid_hash) panic("Could not alloc pidhash!\n"); - /* - * pid_hash contains references to allocated struct pid objects and it - * must be scanned by kmemleak to avoid false positives. - */ - kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0, - GFP_KERNEL); for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ad7cd1c56b07..3ef628845f07 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4745,8 +4745,10 @@ void *__init alloc_large_system_hash(const char *tablename, * some pages at the end of hash table which * alloc_pages_exact() automatically does */ - if (get_order(size) < MAX_ORDER) + if (get_order(size) < MAX_ORDER) { table = alloc_pages_exact(size, GFP_ATOMIC); + kmemleak_alloc(table, size, 1, GFP_ATOMIC); + } } } while (!table && size > PAGE_SIZE && --log2qty); @@ -4764,16 +4766,6 @@ void *__init alloc_large_system_hash(const char *tablename, if (_hash_mask) *_hash_mask = (1 << log2qty) - 1; - /* - * If hashdist is set, the table allocation is done with __vmalloc() - * which invokes the kmemleak_alloc() callback. This function may also - * be called before the slab and kmemleak are initialised when - * kmemleak simply buffers the request to be executed later - * (GFP_ATOMIC flag ignored in this case). - */ - if (!hashdist) - kmemleak_alloc(table, size, 1, GFP_ATOMIC); - return table; } From 005b10769c05fb16db70f7689ffb5ba17e3fc324 Mon Sep 17 00:00:00 2001 From: David Heidelberger Date: Thu, 9 Jul 2009 18:45:46 +0200 Subject: [PATCH 688/741] ALSA: hda - targa and targa-2ch fix Simplify ALC882_TARGA and return gpio3 to ALC883_TARGA_DIG and ALC883_TARGA_2ch_DIG, which I accidentally removed in commit id 64a8be74357477558183b43156c5536b642de134 Signed-off-by: David Heidelberger Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c6c3d4a4d648..bbb9b42e2604 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6919,9 +6919,6 @@ static struct hda_verb alc882_targa_verbs[] = { {0x1b, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */ {0x14, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, - {0x01, AC_VERB_SET_GPIO_MASK, 0x03}, - {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03}, - {0x01, AC_VERB_SET_GPIO_DATA, 0x03}, { } /* end */ }; @@ -7241,7 +7238,8 @@ static struct alc_config_preset alc882_presets[] = { }, [ALC882_TARGA] = { .mixers = { alc882_targa_mixer, alc882_chmode_mixer }, - .init_verbs = { alc882_init_verbs, alc882_targa_verbs}, + .init_verbs = { alc882_init_verbs, alc880_gpio3_init_verbs, + alc882_targa_verbs}, .num_dacs = ARRAY_SIZE(alc882_dac_nids), .dac_nids = alc882_dac_nids, .dig_out_nid = ALC882_DIGOUT_NID, @@ -9238,7 +9236,8 @@ static struct alc_config_preset alc883_presets[] = { }, [ALC883_TARGA_DIG] = { .mixers = { alc883_targa_mixer, alc883_chmode_mixer }, - .init_verbs = { alc883_init_verbs, alc883_targa_verbs}, + .init_verbs = { alc883_init_verbs, alc880_gpio3_init_verbs, + alc883_targa_verbs}, .num_dacs = ARRAY_SIZE(alc883_dac_nids), .dac_nids = alc883_dac_nids, .dig_out_nid = ALC883_DIGOUT_NID, @@ -9251,7 +9250,8 @@ static struct alc_config_preset alc883_presets[] = { }, [ALC883_TARGA_2ch_DIG] = { .mixers = { alc883_targa_2ch_mixer}, - .init_verbs = { alc883_init_verbs, alc883_targa_verbs}, + .init_verbs = { alc883_init_verbs, alc880_gpio3_init_verbs, + alc883_targa_verbs}, .num_dacs = ARRAY_SIZE(alc883_dac_nids), .dac_nids = alc883_dac_nids, .adc_nids = alc883_adc_nids_alt, From b86a6c6c7b0bfc26b3e8d4f48e16ee0b13716385 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 28 Jun 2009 09:26:57 -0700 Subject: [PATCH 689/741] [WATCHDOG] drivers/watchdog/bcm47xx_wdt.c: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm47xx_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index 5c7011cda6a6..751c003864ad 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -161,7 +161,7 @@ static long bcm47xx_wdt_ioctl(struct file *file, { void __user *argp = (void __user *)arg; int __user *p = argp; - int new_value, retval = -EINVAL;; + int new_value, retval = -EINVAL; switch (cmd) { case WDIOC_GETSUPPORT: From db5d2d8a5dfe0ae3e83ac618fd953ecc621adcdf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 29 Jun 2009 18:00:39 +0200 Subject: [PATCH 690/741] [WATCHDOG] w83697ug, fix lock imbalance Don't forget to unlock io_lock when w83697ug_select_wd_register fails in wdt_ctrl. Signed-off-by: Jiri Slaby Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/w83697ug_wdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c index 883b5f79673a..a6c12dec91a1 100644 --- a/drivers/watchdog/w83697ug_wdt.c +++ b/drivers/watchdog/w83697ug_wdt.c @@ -149,8 +149,10 @@ static void wdt_ctrl(int timeout) { spin_lock(&io_lock); - if (w83697ug_select_wd_register() < 0) + if (w83697ug_select_wd_register() < 0) { + spin_unlock(&io_lock); return; + } outb_p(0xF4, WDT_EFER); /* Select CRF4 */ outb_p(timeout, WDT_EFDR); /* Write Timeout counter to CRF4 */ From a6f052e39c3832b5842c4f44d9b3a4295dacfc4a Mon Sep 17 00:00:00 2001 From: Raphael Assenat Date: Mon, 29 Jun 2009 13:56:52 -0400 Subject: [PATCH 691/741] [WATCHDOG] SA1100 watchdog maximum timeout This patch replaces the hardcoded 255 seconds limit for a real limit based on oscr_freq. Also, the 'firmware_version' field is changed to '1' to allow the user space application to easily detect that this driver supports a higher maximum timeout. Signed-off-by: Raphael Assenat Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sa1100_wdt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index ee1caae4d33b..016245419fad 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -38,7 +38,7 @@ static unsigned long oscr_freq; static unsigned long sa1100wdt_users; -static int pre_margin; +static unsigned int pre_margin; static int boot_status; /* @@ -84,6 +84,7 @@ static const struct watchdog_info ident = { .options = WDIOF_CARDRESET | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, .identity = "SA1100/PXA255 Watchdog", + .firmware_version = 1, }; static long sa1100dog_ioctl(struct file *file, unsigned int cmd, @@ -118,7 +119,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, if (ret) break; - if (time <= 0 || time > 255) { + if (time <= 0 || (oscr_freq * (long long)time >= 0xffffffff)) { ret = -EINVAL; break; } From cf1eaab2525e8ae1d53eaf923981c96cb31e57c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Slobodan=20Tomi=C4=87?= Date: Sun, 28 Jun 2009 21:20:36 +0200 Subject: [PATCH 692/741] [WATCHDOG] w83627hf_wdt.c: add support for the W83627EHF support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the W83627EHF/EF and W83627EHG/EG chipsets. Signed-off-by: Slobodan Tomić Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/w83627hf_wdt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c index 916890abffdd..f201accc4e3d 100644 --- a/drivers/watchdog/w83627hf_wdt.c +++ b/drivers/watchdog/w83627hf_wdt.c @@ -89,6 +89,11 @@ static void w83627hf_select_wd_register(void) c = ((inb_p(WDT_EFDR) & 0xf7) | 0x04); /* select WDT0 */ outb_p(0x2b, WDT_EFER); outb_p(c, WDT_EFDR); /* set GPIO3 to WDT0 */ + } else if (c == 0x88) { /* W83627EHF */ + outb_p(0x2d, WDT_EFER); /* select GPIO5 */ + c = inb_p(WDT_EFDR) & ~0x01; /* PIN77 -> WDT0# */ + outb_p(0x2d, WDT_EFER); + outb_p(c, WDT_EFDR); /* set GPIO5 to WDT0 */ } outb_p(0x07, WDT_EFER); /* point to logical device number reg */ From c4c1bff64dfff4e6dd0936a0340f56b9284512c8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 20:02:48 -0400 Subject: [PATCH 693/741] cifs: add pid of initiating process to spnego upcall info cifs: add pid of initiating process to spnego upcall info This will allow the upcall to poke in /proc//environ and get the value of the $KRB5CCNAME env var for the process. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_spnego.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 4a4581cb2b5e..051caecf7d67 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = { /* strlen of ";user=" */ #define USER_KEY_LEN 6 +/* strlen of ";pid=0x" */ +#define PID_KEY_LEN 7 + /* get a key struct with a SPNEGO security blob, suitable for session setup */ struct key * cifs_get_spnego_key(struct cifsSesInfo *sesInfo) @@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) IP_KEY_LEN + INET6_ADDRSTRLEN + MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + - USER_KEY_LEN + strlen(sesInfo->userName) + 1; + USER_KEY_LEN + strlen(sesInfo->userName) + + PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); @@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); sprintf(dp, ";user=%s", sesInfo->userName); + dp = description + strlen(description); + sprintf(dp, ";pid=0x%x", current->pid); + cFYI(1, ("key description = %s", description)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); From 01ea95e3b6b16573a491ef98ad63f7a1bdcb504f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 20:02:49 -0400 Subject: [PATCH 694/741] cifs: rename CIFSSMBUnixSetInfo to CIFSSMBUnixSetPathInfo cifs: rename CIFSSMBUnixSetInfo to CIFSSMBUnixSetPathInfo ...in preparation of adding a SET_FILE_INFO variant. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 2 +- fs/cifs/cifssmb.c | 6 +++--- fs/cifs/dir.c | 15 ++++++++------- fs/cifs/file.c | 6 +++--- fs/cifs/inode.c | 16 ++++++++-------- 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b2bd83fd2aa4..d95fd427de57 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -220,7 +220,7 @@ struct cifs_unix_set_info_args { dev_t device; }; -extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, +extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon, char *fileName, const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 61007c627497..1cd01ba03656 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5075,9 +5075,9 @@ SetAttrLgcyRetry: #endif /* temporarily unneeded SetAttr legacy function */ int -CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, - const struct cifs_unix_set_info_args *args, - const struct nls_table *nls_codepage, int remap) +CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, + const struct cifs_unix_set_info_args *args, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ff55fc6932cb..4326ffd90fa9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -425,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { /* BB implement mode setting via Windows security descriptors e.g. */ @@ -515,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, - &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 97ce4bf89d15..c34b7f8a217b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -448,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file) .mtime = NO_CHANGE_64, .device = 0, }; - CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b22379610d71..ad19007ea05f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1241,10 +1241,10 @@ mkdir_get_info: args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0) { @@ -1876,10 +1876,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) rc = inode_setattr(inode, attrs); From 654cf14ac0a71c56c1f0032140c3403382ca076b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 20:02:49 -0400 Subject: [PATCH 695/741] cifs: make a separate function for filling out FILE_UNIX_BASIC_INFO cifs: make a separate function for filling out FILE_UNIX_BASIC_INFO The SET_FILE_INFO variant will need to do the same thing here. Break this code out into a separate function that both variants can call. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 74 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1cd01ba03656..1f3c8a463fcd 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5074,6 +5074,47 @@ SetAttrLgcyRetry: } #endif /* temporarily unneeded SetAttr legacy function */ +static void +cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, + const struct cifs_unix_set_info_args *args) +{ + u64 mode = args->mode; + + /* + * Samba server ignores set of file size to zero due to bugs in some + * older clients, but we should be precise - we use SetFileSize to + * set file size and do not want to truncate file size to zero + * accidently as happened on one Samba server beta by putting + * zero instead of -1 here + */ + data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); + data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); + data_offset->LastStatusChange = cpu_to_le64(args->ctime); + data_offset->LastAccessTime = cpu_to_le64(args->atime); + data_offset->LastModificationTime = cpu_to_le64(args->mtime); + data_offset->Uid = cpu_to_le64(args->uid); + data_offset->Gid = cpu_to_le64(args->gid); + /* better to leave device as zero when it is */ + data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); + data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); + data_offset->Permissions = cpu_to_le64(mode); + + if (S_ISREG(mode)) + data_offset->Type = cpu_to_le32(UNIX_FILE); + else if (S_ISDIR(mode)) + data_offset->Type = cpu_to_le32(UNIX_DIR); + else if (S_ISLNK(mode)) + data_offset->Type = cpu_to_le32(UNIX_SYMLINK); + else if (S_ISCHR(mode)) + data_offset->Type = cpu_to_le32(UNIX_CHARDEV); + else if (S_ISBLK(mode)) + data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV); + else if (S_ISFIFO(mode)) + data_offset->Type = cpu_to_le32(UNIX_FIFO); + else if (S_ISSOCK(mode)) + data_offset->Type = cpu_to_le32(UNIX_SOCKET); +} + int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, const struct cifs_unix_set_info_args *args, @@ -5086,7 +5127,6 @@ CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, int bytes_returned = 0; FILE_UNIX_BASIC_INFO *data_offset; __u16 params, param_offset, offset, count, byte_count; - __u64 mode = args->mode; cFYI(1, ("In SetUID/GID/Mode")); setPermsRetry: @@ -5137,38 +5177,8 @@ setPermsRetry: pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; - /* Samba server ignores set of file size to zero due to bugs in some - older clients, but we should be precise - we use SetFileSize to - set file size and do not want to truncate file size to zero - accidently as happened on one Samba server beta by putting - zero instead of -1 here */ - data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); - data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); - data_offset->LastStatusChange = cpu_to_le64(args->ctime); - data_offset->LastAccessTime = cpu_to_le64(args->atime); - data_offset->LastModificationTime = cpu_to_le64(args->mtime); - data_offset->Uid = cpu_to_le64(args->uid); - data_offset->Gid = cpu_to_le64(args->gid); - /* better to leave device as zero when it is */ - data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); - data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); - data_offset->Permissions = cpu_to_le64(mode); - - if (S_ISREG(mode)) - data_offset->Type = cpu_to_le32(UNIX_FILE); - else if (S_ISDIR(mode)) - data_offset->Type = cpu_to_le32(UNIX_DIR); - else if (S_ISLNK(mode)) - data_offset->Type = cpu_to_le32(UNIX_SYMLINK); - else if (S_ISCHR(mode)) - data_offset->Type = cpu_to_le32(UNIX_CHARDEV); - else if (S_ISBLK(mode)) - data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV); - else if (S_ISFIFO(mode)) - data_offset->Type = cpu_to_le32(UNIX_FIFO); - else if (S_ISSOCK(mode)) - data_offset->Type = cpu_to_le32(UNIX_SOCKET); + cifs_fill_unix_set_info(data_offset, args); pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, From 3bbeeb3c93a961bd01b969dd4395ecac0c09db8d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 20:02:50 -0400 Subject: [PATCH 696/741] cifs: add and use CIFSSMBUnixSetFileInfo for setattr calls cifs: add and use CIFSSMBUnixSetFileInfo for setattr calls When there's an open filehandle, SET_FILE_INFO is apparently preferred over SET_PATH_INFO. Add a new variant that sets a FILE_UNIX_INFO_BASIC infolevel via SET_FILE_INFO and switch cifs_setattr_unix to use the new call when there's an open filehandle available. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 4 +++ fs/cifs/cifssmb.c | 63 +++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/inode.c | 11 +++++++- 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index d95fd427de57..37c11c08c529 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -220,6 +220,10 @@ struct cifs_unix_set_info_args { dev_t device; }; +extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const struct cifs_unix_set_info_args *args, + u16 fid, u32 pid_of_opener); + extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon, char *fileName, const struct cifs_unix_set_info_args *args, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1f3c8a463fcd..922f5fe2084c 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5115,6 +5115,69 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, data_offset->Type = cpu_to_le32(UNIX_SOCKET); } +int +CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const struct cifs_unix_set_info_args *args, + u16 fid, u32 pid_of_opener) +{ + struct smb_com_transaction2_sfi_req *pSMB = NULL; + FILE_UNIX_BASIC_INFO *data_offset; + int rc = 0; + u16 params, param_offset, offset, byte_count, count; + + cFYI(1, ("Set Unix Info (via SetFileInfo)")); + rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); + + if (rc) + return rc; + + pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); + + params = 6; + pSMB->MaxSetupCount = 0; + pSMB->Reserved = 0; + pSMB->Flags = 0; + pSMB->Timeout = 0; + pSMB->Reserved2 = 0; + param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; + offset = param_offset + params; + + data_offset = (FILE_UNIX_BASIC_INFO *) + ((char *)(&pSMB->hdr.Protocol) + offset); + count = sizeof(FILE_UNIX_BASIC_INFO); + + pSMB->MaxParameterCount = cpu_to_le16(2); + /* BB find max SMB PDU from sess */ + pSMB->MaxDataCount = cpu_to_le16(1000); + pSMB->SetupCount = 1; + pSMB->Reserved3 = 0; + pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); + byte_count = 3 /* pad */ + params + count; + pSMB->DataCount = cpu_to_le16(count); + pSMB->ParameterCount = cpu_to_le16(params); + pSMB->TotalDataCount = pSMB->DataCount; + pSMB->TotalParameterCount = pSMB->ParameterCount; + pSMB->ParameterOffset = cpu_to_le16(param_offset); + pSMB->DataOffset = cpu_to_le16(offset); + pSMB->Fid = fid; + pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); + pSMB->Reserved4 = 0; + pSMB->hdr.smb_buf_length += byte_count; + pSMB->ByteCount = cpu_to_le16(byte_count); + + cifs_fill_unix_set_info(data_offset, args); + + rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); + if (rc) + cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + + return rc; +} + int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, const struct cifs_unix_set_info_args *args, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ad19007ea05f..55b616bb381e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1790,6 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsTconInfo *pTcon = cifs_sb->tcon; struct cifs_unix_set_info_args *args = NULL; + struct cifsFileInfo *open_file; cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", direntry->d_name.name, attrs->ia_valid)); @@ -1876,10 +1877,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, + open_file = find_writable_file(cifsInode); + if (open_file) { + u16 nfid = open_file->netfid; + u32 npid = open_file->pid; + rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); + atomic_dec(&open_file->wrtPending); + } else { + rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + } if (!rc) rc = inode_setattr(inode, attrs); From b77863bfa153e886f9f8faf1a791ba57a36efed0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 9 Jul 2009 22:51:38 +0000 Subject: [PATCH 697/741] [CIFS] update cifs version number Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 +++++- fs/cifs/cifsfs.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 3a9b7a58a51d..92888aa90749 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,11 @@ client generated ones by default (mount option "serverino" turned on by default if server supports it). Add forceuid and forcegid mount options (so that when negotiating unix extensions specifying which uid mounted does not immediately force the server's reported -uids to be overridden). Add support for scope moutn parm. +uids to be overridden). Add support for scope mount parm. Improve +hard link detection to use same inode for both. Do not set +read-only dos attribute on directories (for chmod) since Windows +explorer special cases this attribute bit for directories for +a different purpose. Version 1.58 ------------ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 586df24c9abb..6c170948300d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.59" +#define CIFS_VERSION "1.60" #endif /* _CIFSFS_H */ From e99da35f060f9a3407f7def474a1df31f3b8643a Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 26 Jun 2009 09:46:18 +0800 Subject: [PATCH 698/741] drm/i915: Check the LID device to decide whether the LVDS should be initialized On some boxes the mobile chipset is used and there is no LVDS device. In such case we had better not initialize the LVDS output device so that one pipe can be used for other output device. For example: E-TOP. But unfortunately the LVDS device is still initialized on the boxes based on mobile chipset in KMS mode. It brings that this pipe occupied by LVDS can't be used for other output device. After checking the acpidump we find that there is no LID device on such boxes. In such case we can use the LID device to decide whether the LVDS device should be initialized. If there is no LID device, we can think that there is no LVDS device. It is unnecessary to initialize the LVDS output device. If there exists the LID device, it will continue the current flowchart. Maybe on some boxes there is no LVDS device but the LID device is found. In such case it should be added to the quirk list. http://bugs.freedesktop.org/show_bug.cgi?id=21496 http://bugs.freedesktop.org/show_bug.cgi?id=21856 http://bugs.freedesktop.org/show_bug.cgi?id=21127 Signed-off-by: Zhao Yakui Reviewed-by: Jesse Barnes [anholt: squashed in style fixups] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_lvds.c | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index f65044b1647b..9ab38efffecf 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -36,6 +36,7 @@ #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" +#include #define I915_LVDS "i915_lvds" @@ -788,6 +789,65 @@ static const struct dmi_system_id intel_no_lvds[] = { { } /* terminating entry */ }; +#ifdef CONFIG_ACPI +/* + * check_lid_device -- check whether @handle is an ACPI LID device. + * @handle: ACPI device handle + * @level : depth in the ACPI namespace tree + * @context: the number of LID device when we find the device + * @rv: a return value to fill if desired (Not use) + */ +static acpi_status +check_lid_device(acpi_handle handle, u32 level, void *context, + void **return_value) +{ + struct acpi_device *acpi_dev; + int *lid_present = context; + + acpi_dev = NULL; + /* Get the acpi device for device handle */ + if (acpi_bus_get_device(handle, &acpi_dev) || !acpi_dev) { + /* If there is no ACPI device for handle, return */ + return AE_OK; + } + + if (!strncmp(acpi_device_hid(acpi_dev), "PNP0C0D", 7)) + *lid_present = 1; + + return AE_OK; +} + +/** + * check whether there exists the ACPI LID device by enumerating the ACPI + * device tree. + */ +static int intel_lid_present(void) +{ + int lid_present = 0; + + if (acpi_disabled) { + /* If ACPI is disabled, there is no ACPI device tree to + * check, so assume the LID device would have been present. + */ + return 1; + } + + acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, + check_lid_device, &lid_present, NULL); + + return lid_present; +} +#else +static int intel_lid_present(void) +{ + /* In the absence of ACPI built in, assume that the LID device would + * have been present. + */ + return 1; +} +#endif + /** * intel_lvds_init - setup LVDS connectors on this device * @dev: drm device @@ -811,6 +871,16 @@ void intel_lvds_init(struct drm_device *dev) if (dmi_check_system(intel_no_lvds)) return; + /* Assume that any device without an ACPI LID device also doesn't + * have an integrated LVDS. We would be better off parsing the BIOS + * to get a reliable indicator, but that code isn't written yet. + * + * In the case of all-in-one desktops using LVDS that we've seen, + * they're using SDVO LVDS. + */ + if (!intel_lid_present()) + return; + if (IS_IGDNG(dev)) { if ((I915_READ(PCH_LVDS) & LVDS_DETECTED) == 0) return; From 0b8f18e358384a52c1ed7fa7129c08e7eaf86bb6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 01:46:37 -0400 Subject: [PATCH 699/741] cifs: convert cifs_get_inode_info and non-posix readdir to use cifs_iget cifs: convert cifs_get_inode_info and non-posix readdir to use cifs_iget Rather than allocating an inode and filling it out, have cifs_get_inode_info fill out a cifs_fattr and call cifs_iget. This means a pretty hefty reorganization of cifs_get_inode_info. For the readdir codepath, add a couple of new functions for filling out cifs_fattr's from different FindFile response infolevels. Finally, remove cifs_new_inode since there are no more callers. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 26 +-- fs/cifs/cifsglob.h | 2 + fs/cifs/cifsproto.h | 6 +- fs/cifs/inode.c | 397 +++++++++++++++++--------------------------- fs/cifs/readdir.c | 344 +++++++++----------------------------- 5 files changed, 249 insertions(+), 526 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1403b5d86a73..6941c22398a6 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, - struct inode *inode) + struct cifs_fattr *fattr) { int i; int num_aces = 0; @@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (!pdacl) { /* no DACL in the security descriptor, set all the permissions for user/group/other */ - inode->i_mode |= S_IRWXUGO; + fattr->cf_mode |= S_IRWXUGO; return; } @@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, user/group/other have no permissions */ - inode->i_mode &= ~(S_IRWXUGO); + fattr->cf_mode &= ~(S_IRWXUGO); acl_base = (char *)pdacl; acl_size = sizeof(struct cifs_acl); @@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, if (compare_sids(&(ppace[i]->sid), pownersid)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &user_mask); if (compare_sids(&(ppace[i]->sid), pgrpsid)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &group_mask); if (compare_sids(&(ppace[i]->sid), &sid_everyone)) access_flags_to_mode(ppace[i]->access_req, ppace[i]->type, - &(inode->i_mode), + &fattr->cf_mode, &other_mask); /* memcpy((void *)(&(cifscred->aces[i])), @@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, - struct inode *inode) + struct cifs_fattr *fattr) { int rc; struct cifs_sid *owner_sid_ptr, *group_sid_ptr; @@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, char *end_of_acl = ((char *)pntsd) + acl_len; __u32 dacloffset; - if ((inode == NULL) || (pntsd == NULL)) + if (pntsd == NULL) return -EIO; owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + @@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, if (dacloffset) parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, - group_sid_ptr, inode); + group_sid_ptr, fattr); else cFYI(1, ("no ACL")); /* BB grant all or default perms? */ @@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, sizeof(struct cifs_sid)); */ - return 0; } @@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ -void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, - const char *path, const __u16 *pfid) +void +cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, + struct inode *inode, const char *path, const __u16 *pfid) { struct cifs_ntsd *pntsd = NULL; u32 acllen = 0; @@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ if (pntsd) - rc = parse_sec_desc(pntsd, acllen, inode); + rc = parse_sec_desc(pntsd, acllen, fattr); if (rc) cFYI(1, ("parse sec desc failed rc = %d", rc)); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e6435cba8113..8bcf5a4bcded 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -479,6 +479,8 @@ struct dfs_info3_param { */ #define CIFS_FATTR_DFS_REFERRAL 0x1 +#define CIFS_FATTR_DELETE_PENDING 0x2 +#define CIFS_FATTR_NEED_REVAL 0x4 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 37c11c08c529..da8fbf565991 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -102,7 +102,6 @@ extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); -extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); extern struct inode *cifs_iget(struct super_block *sb, struct cifs_fattr *fattr); @@ -113,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode, extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, int xid); -extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, - const char *path, const __u16 *pfid); +extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, + struct cifs_fattr *fattr, struct inode *inode, + const char *path, const __u16 *pfid); extern int mode_to_acl(struct inode *inode, const char *path, __u64); extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 55b616bb381e..a807397f444e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -82,23 +82,34 @@ void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); - unsigned long now = jiffies; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned long oldtime = cifs_i->time; inode->i_atime = fattr->cf_atime; inode->i_mtime = fattr->cf_mtime; inode->i_ctime = fattr->cf_ctime; - inode->i_mode = fattr->cf_mode; inode->i_rdev = fattr->cf_rdev; inode->i_nlink = fattr->cf_nlink; inode->i_uid = fattr->cf_uid; inode->i_gid = fattr->cf_gid; + /* if dynperm is set, don't clobber existing mode */ + if (inode->i_state & I_NEW || + !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) + inode->i_mode = fattr->cf_mode; + cifs_i->cifsAttrs = fattr->cf_cifsattrs; cifs_i->uniqueid = fattr->cf_uniqueid; + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + cifs_i->time = 0; + else + cifs_i->time = jiffies; + cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode, - cifs_i->time, now)); - cifs_i->time = now; + oldtime, cifs_i->time)); + + cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; /* * Can't safely change the file size here if the client is writing to @@ -219,49 +230,6 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } -/** - * cifs_new inode - create new inode, initialize, and hash it - * @sb - pointer to superblock - * @inum - if valid pointer and serverino is enabled, replace i_ino with val - * - * Create a new inode, initialize it for CIFS and hash it. Returns the new - * inode or NULL if one couldn't be allocated. - * - * If the share isn't mounted with "serverino" or inum is a NULL pointer then - * we'll just use the inode number assigned by new_inode(). Note that this can - * mean i_ino collisions since the i_ino assigned by new_inode is not - * guaranteed to be unique. - */ -struct inode * -cifs_new_inode(struct super_block *sb, __u64 *inum) -{ - struct inode *inode; - - inode = new_inode(sb); - if (inode == NULL) - return NULL; - - /* - * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we - * stop passing inum as ptr. Are there sanity checks we can use to - * ensure that the server is really filling in that field? Also, - * if serverino is disabled, perhaps we should be using iunique()? - */ - if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) - inode->i_ino = (unsigned long) *inum; - - /* - * must set this here instead of cifs_alloc_inode since VFS will - * clobber i_flags - */ - if (sb->s_flags & MS_NOATIME) - inode->i_flags |= S_NOATIME | S_NOCMTIME; - - insert_inode_hash(inode); - - return inode; -} - int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *full_path, struct super_block *sb, int xid) @@ -302,9 +270,9 @@ int cifs_get_inode_info_unix(struct inode **pinode, return rc; } -static int decode_sfu_inode(struct inode *inode, __u64 size, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) +static int +cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, + struct cifs_sb_info *cifs_sb, int xid) { int rc; int oplock = 0; @@ -316,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, pbuf = buf; - if (size == 0) { - inode->i_mode |= S_IFIFO; + fattr->cf_mode &= ~S_IFMT; + + if (fattr->cf_eof == 0) { + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; return 0; - } else if (size < 8) { + } else if (fattr->cf_eof < 8) { + fattr->cf_mode |= S_IFREG; + fattr->cf_dtype = DT_REG; return -EINVAL; /* EOPNOTSUPP? */ } @@ -331,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, if (rc == 0) { int buf_type = CIFS_NO_BUFFER; /* Read header */ - rc = CIFSSMBRead(xid, pTcon, - netfid, + rc = CIFSSMBRead(xid, pTcon, netfid, 24 /* length */, 0 /* offset */, &bytes_read, &pbuf, &buf_type); if ((rc == 0) && (bytes_read >= 8)) { if (memcmp("IntxBLK", pbuf, 8) == 0) { cFYI(1, ("Block device")); - inode->i_mode |= S_IFBLK; + fattr->cf_mode |= S_IFBLK; + fattr->cf_dtype = DT_BLK; if (bytes_read == 24) { /* we have enough to decode dev num */ __u64 mjr; /* major */ __u64 mnr; /* minor */ mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); - inode->i_rdev = MKDEV(mjr, mnr); + fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxCHR", pbuf, 8) == 0) { cFYI(1, ("Char device")); - inode->i_mode |= S_IFCHR; + fattr->cf_mode |= S_IFCHR; + fattr->cf_dtype = DT_CHR; if (bytes_read == 24) { /* we have enough to decode dev num */ __u64 mjr; /* major */ __u64 mnr; /* minor */ mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); - inode->i_rdev = MKDEV(mjr, mnr); + fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxLNK", pbuf, 7) == 0) { cFYI(1, ("Symlink")); - inode->i_mode |= S_IFLNK; + fattr->cf_mode |= S_IFLNK; + fattr->cf_dtype = DT_LNK; } else { - inode->i_mode |= S_IFREG; /* file? */ + fattr->cf_mode |= S_IFREG; /* file? */ + fattr->cf_dtype = DT_REG; rc = -EOPNOTSUPP; } } else { - inode->i_mode |= S_IFREG; /* then it is a file */ + fattr->cf_mode |= S_IFREG; /* then it is a file */ + fattr->cf_dtype = DT_REG; rc = -EOPNOTSUPP; /* or some unknown SFU type */ } CIFSSMBClose(xid, pTcon, netfid); @@ -376,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size, #define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */ -static int get_sfu_mode(struct inode *inode, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, int xid) +/* + * Fetch mode bits as provided by SFU. + * + * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ? + */ +static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, + struct cifs_sb_info *cifs_sb, int xid) { #ifdef CONFIG_CIFS_XATTR ssize_t rc; @@ -386,68 +367,80 @@ static int get_sfu_mode(struct inode *inode, __u32 mode; rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", - ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + ea_value, 4 /* size of buf */, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc < 0) return (int)rc; else if (rc > 3) { mode = le32_to_cpu(*((__le32 *)ea_value)); - inode->i_mode &= ~SFBITS_MASK; - cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode)); - inode->i_mode = (mode & SFBITS_MASK) | inode->i_mode; + fattr->cf_mode &= ~SFBITS_MASK; + cFYI(1, ("special bits 0%o org mode 0%o", mode, + fattr->cf_mode)); + fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode; cFYI(1, ("special mode bits 0%o", mode)); - return 0; - } else { - return 0; } + + return 0; #else return -EOPNOTSUPP; #endif } -/* - * Needed to setup inode data for the directory which is the - * junction to the new submount (ie to setup the fake directory - * which represents a DFS referral) - */ -static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat, - struct super_block *sb) +/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ +void +cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, + struct cifs_sb_info *cifs_sb, bool adjust_tz) { - memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO)); + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); + if (info->DeletePending) + fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING; -/* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); - __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); - __u8 pfnd_dat->DeletePending = 0; - __u8 pfnd_data->Directory = 0; - __le32 pfnd_dat->EASize = 0; - __u64 pfnd_dat->IndexNumber = 0; - __u64 pfnd_dat->IndexNumber1 = 0; */ - pfnd_dat->CreationTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY); - pfnd_dat->NumberOfLinks = cpu_to_le32(2); + if (info->LastAccessTime) + fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); + else + fattr->cf_atime = CURRENT_TIME; + + fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); + fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); + + if (adjust_tz) { + fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; + fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; + } + + fattr->cf_eof = le64_to_cpu(info->EndOfFile); + fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_dtype = DT_DIR; + } else { + fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_REG; + } + + /* clear write bits if ATTR_READONLY is set */ + if (fattr->cf_cifsattrs & ATTR_READONLY) + fattr->cf_mode &= ~(S_IWUGO); + + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + + fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_gid = cifs_sb->mnt_gid; } int cifs_get_inode_info(struct inode **pinode, const unsigned char *full_path, FILE_ALL_INFO *pfindData, struct super_block *sb, int xid, const __u16 *pfid) { - int rc = 0; - __u32 attr; - struct cifsInodeInfo *cifsInfo; + int rc = 0, tmprc; struct cifsTconInfo *pTcon; - struct inode *inode; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); char *buf = NULL; bool adjustTZ = false; - bool is_dfs_referral = false; - umode_t default_mode; + struct cifs_fattr fattr; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); @@ -482,164 +475,83 @@ int cifs_get_inode_info(struct inode **pinode, adjustTZ = true; } } - /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ - if (rc == -EREMOTE) { - is_dfs_referral = true; - fill_fake_finddata(pfindData, sb); + + if (!rc) { + cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData, + cifs_sb, adjustTZ); + } else if (rc == -EREMOTE) { + cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else if (rc) + } else { goto cgii_exit; + } - attr = le32_to_cpu(pfindData->Attributes); - - /* get new inode */ + /* + * If an inode wasn't passed in, then get the inode number + * + * Is an i_ino of zero legal? Can we use that to check if the server + * supports returning inode numbers? Are there other sanity checks we + * can use to ensure that the server is really filling in that field? + * + * We can not use the IndexNumber field by default from Windows or + * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA + * CIFS spec claims that this value is unique within the scope of a + * share, and the windows docs hint that it's actually unique + * per-machine. + * + * There may be higher info levels that work but are there Windows + * server or network appliances for which IndexNumber field is not + * guaranteed unique? + */ if (*pinode == NULL) { - __u64 inode_num; - __u64 *pinum = &inode_num; - - /* Is an i_ino of zero legal? Can we use that to check - if the server supports returning inode numbers? Are - there other sanity checks we can use to ensure that - the server is really filling in that field? */ - - /* We can not use the IndexNumber field by default from - Windows or Samba (in ALL_INFO buf) but we can request - it explicitly. It may not be unique presumably if - the server has multiple devices mounted under one share */ - - /* There may be higher info levels that work but are - there Windows server or network appliances for which - IndexNumber field is not guaranteed unique? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { int rc1 = 0; rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - full_path, pinum, + full_path, &fattr.cf_uniqueid, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc1) { - cFYI(1, ("GetSrvInodeNum rc %d", rc1)); - pinum = NULL; /* BB EOPNOSUPP disable SERVER_INUM? */ + cFYI(1, ("GetSrvInodeNum rc %d", rc1)); + fattr.cf_uniqueid = iunique(sb, ROOT_I); } } else { - pinum = NULL; + fattr.cf_uniqueid = iunique(sb, ROOT_I); } - - *pinode = cifs_new_inode(sb, pinum); - if (*pinode == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } - } - inode = *pinode; - cifsInfo = CIFS_I(inode); - cifsInfo->cifsAttrs = attr; - cifsInfo->delete_pending = pfindData->DeletePending ? true : false; - cFYI(1, ("Old time %ld", cifsInfo->time)); - cifsInfo->time = jiffies; - cFYI(1, ("New time %ld", cifsInfo->time)); - - /* blksize needs to be multiple of two. So safer to default to - blksize and blkbits set in superblock so 2**blkbits and blksize - will match rather than setting to: - (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/ - - /* Linux can not store file creation time so ignore it */ - if (pfindData->LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime); - else /* do not need to use current_fs_time - time not stored */ - inode->i_atime = CURRENT_TIME; - inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime); - inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime); - cFYI(DBG2, ("Attributes came in as 0x%x", attr)); - if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { - inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; - } - - /* get default inode mode */ - if (attr & ATTR_DIRECTORY) - default_mode = cifs_sb->mnt_dir_mode; - else - default_mode = cifs_sb->mnt_file_mode; - - /* set permission bits */ - if (atomic_read(&cifsInfo->inUse) == 0 || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) - inode->i_mode = default_mode; - else { - /* just reenable write bits if !ATTR_READONLY */ - if ((inode->i_mode & S_IWUGO) == 0 && - (attr & ATTR_READONLY) == 0) - inode->i_mode |= (S_IWUGO & default_mode); - - inode->i_mode &= ~S_IFMT; - } - /* clear write bits if ATTR_READONLY is set */ - if (attr & ATTR_READONLY) - inode->i_mode &= ~S_IWUGO; - - /* set inode type */ - if ((attr & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { - /* no need to fix endianness on 0 */ - if (pfindData->EndOfFile == 0) - inode->i_mode |= S_IFIFO; - else if (decode_sfu_inode(inode, - le64_to_cpu(pfindData->EndOfFile), - full_path, cifs_sb, xid)) - cFYI(1, ("unknown SFU file type\n")); } else { - if (attr & ATTR_DIRECTORY) - inode->i_mode |= S_IFDIR; - else - inode->i_mode |= S_IFREG; + fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid; } - cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile); - spin_lock(&inode->i_lock); - if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) { - /* can not safely shrink the file size here if the - client is writing to it due to potential races */ - i_size_write(inode, cifsInfo->server_eof); - - /* 512 bytes (2**9) is the fake blocksize that must be - used for this calculation */ - inode->i_blocks = (512 - 1 + le64_to_cpu( - pfindData->AllocationSize)) >> 9; + /* query for SFU type info if supported and needed */ + if (fattr.cf_cifsattrs & ATTR_SYSTEM && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid); + if (tmprc) + cFYI(1, ("cifs_sfu_type failed: %d", tmprc)); } - spin_unlock(&inode->i_lock); - inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); - - /* BB fill in uid and gid here? with help from winbind? - or retrieve from NTFS stream extended attribute */ #ifdef CONFIG_CIFS_EXPERIMENTAL /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { cFYI(1, ("Getting mode bits from ACL")); - acl_to_uid_mode(cifs_sb, inode, full_path, pfid); + cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); } #endif - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - /* fill in remaining high mode bits e.g. SUID, VTX */ - get_sfu_mode(inode, full_path, cifs_sb, xid); - } else if (atomic_read(&cifsInfo->inUse) == 0) { - inode->i_uid = cifs_sb->mnt_uid; - inode->i_gid = cifs_sb->mnt_gid; - /* set so we do not keep refreshing these fields with - bad data after user has changed them in memory */ - atomic_set(&cifsInfo->inUse, 1); + + /* fill in remaining high mode bits e.g. SUID, VTX */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + cifs_sfu_mode(&fattr, full_path, cifs_sb, xid); + + if (!*pinode) { + *pinode = cifs_iget(sb, &fattr); + if (!*pinode) + rc = -ENOMEM; + } else { + cifs_fattr_to_inode(*pinode, &fattr); } - cifs_set_ops(inode, is_dfs_referral); - - - - cgii_exit: kfree(buf); return rc; @@ -753,21 +665,14 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(-ENOMEM); xid = GetXid(); - if (cifs_sb->tcon->unix_ext) { + if (cifs_sb->tcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); - if (!inode) - return ERR_PTR(-ENOMEM); - } else { - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; - - rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb, + else + rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - unlock_new_inode(inode); - } + + if (!inode) + return ERR_PTR(-ENOMEM); if (rc && cifs_sb->tcon->ipc) { cFYI(1, ("ipc connection - fake read inode")); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 231aa6953f83..f823a4a208a7 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -112,239 +112,74 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, return dentry; } -/* Returns 1 if new inode created, 2 if both dentry and inode were */ -/* Might check in the future if inode number changed so we can rehash inode */ -static int -construct_dentry(struct qstr *qstring, struct file *file, - struct inode **ptmp_inode, struct dentry **pnew_dentry, - __u64 *inum) +static void +cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { - struct dentry *tmp_dentry = NULL; - struct super_block *sb = file->f_path.dentry->d_sb; - int rc = 0; + fattr->cf_uid = cifs_sb->mnt_uid; + fattr->cf_gid = cifs_sb->mnt_gid; - cFYI(1, ("For %s", qstring->name)); - - tmp_dentry = d_lookup(file->f_path.dentry, qstring); - if (tmp_dentry) { - /* BB: overwrite old name? i.e. tmp_dentry->d_name and - * tmp_dentry->d_name.len?? - */ - cFYI(0, ("existing dentry with inode 0x%p", - tmp_dentry->d_inode)); - *ptmp_inode = tmp_dentry->d_inode; - if (*ptmp_inode == NULL) { - *ptmp_inode = cifs_new_inode(sb, inum); - if (*ptmp_inode == NULL) - return rc; - rc = 1; - } + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; + fattr->cf_dtype = DT_DIR; } else { - tmp_dentry = d_alloc(file->f_path.dentry, qstring); - if (tmp_dentry == NULL) { - cERROR(1, ("Failed allocating dentry")); - *ptmp_inode = NULL; - return rc; - } - - if (CIFS_SB(sb)->tcon->nocase) - tmp_dentry->d_op = &cifs_ci_dentry_ops; - else - tmp_dentry->d_op = &cifs_dentry_ops; - - *ptmp_inode = cifs_new_inode(sb, inum); - if (*ptmp_inode == NULL) - return rc; - rc = 2; + fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; + fattr->cf_dtype = DT_REG; } - tmp_dentry->d_time = jiffies; - *pnew_dentry = tmp_dentry; - return rc; -} + if (fattr->cf_cifsattrs & ATTR_READONLY) + fattr->cf_mode &= ~S_IWUGO; -static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, - char *buf, unsigned int *pobject_type, int isNewInode) -{ - loff_t local_size; - struct timespec local_mtime; - - struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb); - __u32 attr; - __u64 allocation_size; - __u64 end_of_file; - umode_t default_mode; - - /* save mtime and size */ - local_mtime = tmp_inode->i_mtime; - local_size = tmp_inode->i_size; - - if (new_buf_type) { - FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf; - - attr = le32_to_cpu(pfindData->ExtFileAttributes); - allocation_size = le64_to_cpu(pfindData->AllocationSize); - end_of_file = le64_to_cpu(pfindData->EndOfFile); - tmp_inode->i_atime = - cifs_NTtimeToUnix(pfindData->LastAccessTime); - tmp_inode->i_mtime = - cifs_NTtimeToUnix(pfindData->LastWriteTime); - tmp_inode->i_ctime = - cifs_NTtimeToUnix(pfindData->ChangeTime); - } else { /* legacy, OS2 and DOS style */ - int offset = cifs_sb->tcon->ses->server->timeAdj; - FIND_FILE_STANDARD_INFO *pfindData = - (FIND_FILE_STANDARD_INFO *)buf; - - tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate, - pfindData->LastWriteTime, - offset); - tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate, - pfindData->LastAccessTime, - offset); - tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate, - pfindData->LastWriteTime, - offset); - attr = le16_to_cpu(pfindData->Attributes); - allocation_size = le32_to_cpu(pfindData->AllocationSize); - end_of_file = le32_to_cpu(pfindData->DataSize); - } - - /* Linux can not store file creation time unfortunately so ignore it */ - - cifsInfo->cifsAttrs = attr; -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { - /* get more accurate mode via ACL - so force inode refresh */ - cifsInfo->time = 0; - } else -#endif /* CONFIG_CIFS_EXPERIMENTAL */ - cifsInfo->time = jiffies; - - /* treat dos attribute of read-only as read-only mode bit e.g. 555? */ - /* 2767 perms - indicate mandatory locking */ - /* BB fill in uid and gid here? with help from winbind? - or retrieve from NTFS stream extended attribute */ - if (atomic_read(&cifsInfo->inUse) == 0) { - tmp_inode->i_uid = cifs_sb->mnt_uid; - tmp_inode->i_gid = cifs_sb->mnt_gid; - } - - if (attr & ATTR_DIRECTORY) - default_mode = cifs_sb->mnt_dir_mode; - else - default_mode = cifs_sb->mnt_file_mode; - - /* set initial permissions */ - if ((atomic_read(&cifsInfo->inUse) == 0) || - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) - tmp_inode->i_mode = default_mode; - else { - /* just reenable write bits if !ATTR_READONLY */ - if ((tmp_inode->i_mode & S_IWUGO) == 0 && - (attr & ATTR_READONLY) == 0) - tmp_inode->i_mode |= (S_IWUGO & default_mode); - - tmp_inode->i_mode &= ~S_IFMT; - } - - /* clear write bits if ATTR_READONLY is set */ - if (attr & ATTR_READONLY) - tmp_inode->i_mode &= ~S_IWUGO; - - /* set inode type */ - if ((attr & ATTR_SYSTEM) && - (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { - if (end_of_file == 0) { - tmp_inode->i_mode |= S_IFIFO; - *pobject_type = DT_FIFO; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && + fattr->cf_cifsattrs & ATTR_SYSTEM) { + if (fattr->cf_eof == 0) { + fattr->cf_mode &= ~S_IFMT; + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; } else { /* - * trying to get the type can be slow, so just call - * this a regular file for now, and mark for reval + * trying to get the type and mode via SFU can be slow, + * so just call those regular files for now, and mark + * for reval */ - tmp_inode->i_mode |= S_IFREG; - *pobject_type = DT_REG; - cifsInfo->time = 0; - } - } else { - if (attr & ATTR_DIRECTORY) { - tmp_inode->i_mode |= S_IFDIR; - *pobject_type = DT_DIR; - } else { - tmp_inode->i_mode |= S_IFREG; - *pobject_type = DT_REG; + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; } } +} - /* can not fill in nlink here as in qpathinfo version and Unx search */ - if (atomic_read(&cifsInfo->inUse) == 0) - atomic_set(&cifsInfo->inUse, 1); +void +cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, + struct cifs_sb_info *cifs_sb) +{ + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); + fattr->cf_eof = le64_to_cpu(info->EndOfFile); + fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); + fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); + fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); - cifsInfo->server_eof = end_of_file; - spin_lock(&tmp_inode->i_lock); - if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the - client is writing to it due to potential races */ - i_size_write(tmp_inode, end_of_file); + cifs_fill_common_info(fattr, cifs_sb); +} - /* 512 bytes (2**9) is the fake blocksize that must be used */ - /* for this calculation, even though the reported blocksize is larger */ - tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9; - } - spin_unlock(&tmp_inode->i_lock); +void +cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, + struct cifs_sb_info *cifs_sb) +{ + int offset = cifs_sb->tcon->ses->server->timeAdj; - if (allocation_size < end_of_file) - cFYI(1, ("May be sparse file, allocation less than file size")); - cFYI(1, ("File Size %ld and blocks %llu", - (unsigned long)tmp_inode->i_size, - (unsigned long long)tmp_inode->i_blocks)); - if (S_ISREG(tmp_inode->i_mode)) { - cFYI(1, ("File inode")); - tmp_inode->i_op = &cifs_file_inode_ops; - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - tmp_inode->i_fop = &cifs_file_nobrl_ops; - else - tmp_inode->i_fop = &cifs_file_ops; + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate, + info->LastAccessTime, offset); + fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate, + info->LastWriteTime, offset); + fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate, + info->LastWriteTime, offset); - if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && - (cifs_sb->tcon->ses->server->maxBuf < - PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) - tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; - else - tmp_inode->i_data.a_ops = &cifs_addr_ops; + fattr->cf_cifsattrs = le16_to_cpu(info->Attributes); + fattr->cf_bytes = le32_to_cpu(info->AllocationSize); + fattr->cf_eof = le32_to_cpu(info->DataSize); - if (isNewInode) - return; /* No sense invalidating pages for new inode - since have not started caching readahead file - data yet */ - - if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && - (local_size == tmp_inode->i_size)) { - cFYI(1, ("inode exists but unchanged")); - } else { - /* file may have changed on server */ - cFYI(1, ("invalidate inode, readdir detected change")); - invalidate_remote_inode(tmp_inode); - } - } else if (S_ISDIR(tmp_inode->i_mode)) { - cFYI(1, ("Directory inode")); - tmp_inode->i_op = &cifs_dir_inode_ops; - tmp_inode->i_fop = &cifs_dir_ops; - } else if (S_ISLNK(tmp_inode->i_mode)) { - cFYI(1, ("Symbolic Link inode")); - tmp_inode->i_op = &cifs_symlink_inode_ops; - } else { - cFYI(1, ("Init special inode")); - init_special_inode(tmp_inode, tmp_inode->i_mode, - tmp_inode->i_rdev); - } + cifs_fill_common_info(fattr, cifs_sb); } /* BB eventually need to add the following helper function to @@ -846,11 +681,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, int rc = 0; struct qstr qstring; struct cifsFileInfo *pCifsF; - unsigned int obj_type; - __u64 inum; + u64 inum; ino_t ino; + struct super_block *sb; struct cifs_sb_info *cifs_sb; - struct inode *tmp_inode; struct dentry *tmp_dentry; struct cifs_fattr fattr; @@ -870,71 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, if (rc != 0) return 0; - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + sb = file->f_path.dentry->d_sb; + cifs_sb = CIFS_SB(sb); qstring.name = scratch_buf; rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, pCifsF->srch_inf.info_level, pCifsF->srch_inf.unicode, cifs_sb, - max_len, - &inum /* returned */); + max_len, &inum /* returned */); if (rc) return rc; - /* only these two infolevels return valid inode numbers */ - if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) { + if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) cifs_unix_basic_to_fattr(&fattr, &((FILE_UNIX_INFO *) pfindEntry)->basic, cifs_sb); - tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, - &fattr); - obj_type = fattr.cf_dtype; - ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); - } else { - if (pCifsF->srch_inf.info_level == - SMB_FIND_FILE_ID_FULL_DIR_INFO) - rc = construct_dentry(&qstring, file, &tmp_inode, - &tmp_dentry, &inum); - else - rc = construct_dentry(&qstring, file, &tmp_inode, - &tmp_dentry, NULL); + else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) + cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *) + pfindEntry, cifs_sb); + else + cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *) + pfindEntry, cifs_sb); - if ((tmp_inode == NULL) || (tmp_dentry == NULL)) { - rc = -ENOMEM; - goto out; - } + /* FIXME: make _to_fattr functions fill this out */ + if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO) + fattr.cf_uniqueid = inum; + else + fattr.cf_uniqueid = iunique(sb, ROOT_I); - /* we pass in rc below, indicating whether it is a new inode, - * so we can figure out whether to invalidate the inode cached - * data if the file has changed - */ - if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) - fill_in_inode(tmp_inode, 0, pfindEntry, &obj_type, rc); - else - fill_in_inode(tmp_inode, 1, pfindEntry, &obj_type, rc); - - /* new inode - needs to be tied to dentry */ - if (rc) { - d_instantiate(tmp_dentry, tmp_inode); - if (rc == 2) - d_rehash(tmp_dentry); - } - - ino = cifs_uniqueid_to_ino_t(tmp_inode->i_ino); - } + ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); + tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, - ino, obj_type); + ino, fattr.cf_dtype); + + /* + * we can not return filldir errors to the caller since they are + * "normal" when the stat blocksize is too small - we return remapped + * error instead + * + * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above + * case already. Why should we be clobbering other errors from it? + */ if (rc) { cFYI(1, ("filldir rc = %d", rc)); - /* we can not return filldir errors to the caller - since they are "normal" when the stat blocksize - is too small - we return remapped error instead */ rc = -EOVERFLOW; } - -out: dput(tmp_dentry); return rc; } From aeaaf253c4dee7ff9af2f3f0595f3bb66964e944 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 01:46:39 -0400 Subject: [PATCH 700/741] cifs: remove cifsInodeInfo->inUse counter cifs: remove cifsInodeInfo->inUse counter It was purported to be a refcounter of some sort, but was never used that way. It never served any purpose that wasn't served equally well by the I_NEW flag. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 1 - fs/cifs/cifsglob.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 9f669f982c4d..44f30504b82d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb) if (!cifs_inode) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ - atomic_set(&cifs_inode->inUse, 0); cifs_inode->time = 0; cifs_inode->write_behind_rc = 0; /* Until the file is open and we have gotten oplock diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8bcf5a4bcded..63f6cdfa5638 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -364,7 +364,6 @@ struct cifsInodeInfo { struct list_head openFileList; int write_behind_rc; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ - atomic_t inUse; /* num concurrent users (local openers cifs) of file*/ unsigned long time; /* jiffies of last update/check of inode */ bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ From d0c280d26de9422c9c943f8f486b9830cd9bea70 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 9 Jul 2009 01:46:44 -0400 Subject: [PATCH 701/741] cifs: when ATTR_READONLY is set, only clear write bits on non-directories cifs: when ATTR_READONLY is set, only clear write bits on non-directories On windows servers, ATTR_READONLY apparently either has no meaning or serves as some sort of queue to certain applications for unrelated behavior. This MS kbase article has details: http://support.microsoft.com/kb/326549/ Don't clear the write bits directory mode when ATTR_READONLY is set. Reported-by: pouchat@peewiki.net Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a807397f444e..18afe57b2461 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -419,11 +419,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; - } - /* clear write bits if ATTR_READONLY is set */ - if (fattr->cf_cifsattrs & ATTR_READONLY) - fattr->cf_mode &= ~(S_IWUGO); + /* clear write bits if ATTR_READONLY is set */ + if (fattr->cf_cifsattrs & ATTR_READONLY) + fattr->cf_mode &= ~(S_IWUGO); + } fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); From a57de0b4336e48db2811a2030bb68dba8dd09d88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Jul 2009 12:09:13 +0000 Subject: [PATCH 702/741] net: adding memory barrier to the poll and receive callbacks Adding memory barrier after the poll_wait function, paired with receive callbacks. Adding fuctions sock_poll_wait and sk_has_sleeper to wrap the memory barrier. Without the memory barrier, following race can happen. The race fires, when following code paths meet, and the tp->rcv_nxt and __add_wait_queue updates stay in CPU caches. CPU1 CPU2 sys_select receive packet ... ... __add_wait_queue update tp->rcv_nxt ... ... tp->rcv_nxt check sock_def_readable ... { schedule ... if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep) ... } If there was no cache the code would work ok, since the wait_queue and rcv_nxt are opposit to each other. Meaning that once tp->rcv_nxt is updated by CPU2, the CPU1 either already passed the tp->rcv_nxt check and sleeps, or will get the new value for tp->rcv_nxt and will return with new data mask. In both cases the process (CPU1) is being added to the wait queue, so the waitqueue_active (CPU2) call cannot miss and will wake up CPU1. The bad case is when the __add_wait_queue changes done by CPU1 stay in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 will then endup calling schedule and sleep forever if there are no more data on the socket. Calls to poll_wait in following modules were ommited: net/bluetooth/af_bluetooth.c net/irda/af_irda.c net/irda/irnet/irnet_ppp.c net/mac80211/rc80211_pid_debugfs.c net/phonet/socket.c net/rds/af_rds.c net/rfkill/core.c net/sunrpc/cache.c net/sunrpc/rpc_pipe.c net/tipc/socket.c Signed-off-by: Jiri Olsa Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ net/atm/common.c | 6 ++-- net/core/datagram.c | 2 +- net/core/sock.c | 8 +++--- net/dccp/output.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 2 +- net/iucv/af_iucv.c | 4 +-- net/rxrpc/af_rxrpc.c | 4 +-- net/unix/af_unix.c | 8 +++--- 10 files changed, 85 insertions(+), 19 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 352f06bbd7a9..4eb8409249f6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -1241,6 +1242,71 @@ static inline int sk_has_allocations(const struct sock *sk) return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); } +/** + * sk_has_sleeper - check if there are any waiting processes + * @sk: socket + * + * Returns true if socket has waiting processes + * + * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * barrier call. They were added due to the race found within the tcp code. + * + * Consider following tcp code paths: + * + * CPU1 CPU2 + * + * sys_select receive packet + * ... ... + * __add_wait_queue update tp->rcv_nxt + * ... ... + * tp->rcv_nxt check sock_def_readable + * ... { + * schedule ... + * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + * wake_up_interruptible(sk->sk_sleep) + * ... + * } + * + * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay + * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 + * could then endup calling schedule and sleep forever if there are no more + * data on the socket. + */ +static inline int sk_has_sleeper(struct sock *sk) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier is paired in the sock_poll_wait. + */ + smp_mb(); + return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +} + +/** + * sock_poll_wait - place memory barrier behind the poll_wait call. + * @filp: file + * @wait_address: socket wait queue + * @p: poll_table + * + * See the comments in the sk_has_sleeper function. + */ +static inline void sock_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) { + poll_wait(filp, wait_address, p); + /* + * We need to be sure we are in sync with the + * socket flags modification. + * + * This memory barrier is paired in the sk_has_sleeper. + */ + smp_mb(); + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in diff --git a/net/atm/common.c b/net/atm/common.c index c1c97936192c..8c4d843eb17f 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -92,7 +92,7 @@ static void vcc_sock_destruct(struct sock *sk) static void vcc_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (vcc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -594,7 +594,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/core/datagram.c b/net/core/datagram.c index 58abee1f1df1..b0fe69211eef 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -712,7 +712,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ diff --git a/net/core/sock.c b/net/core/sock.c index b0ba569bc973..6354863b1c68 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1715,7 +1715,7 @@ EXPORT_SYMBOL(sock_no_sendpage); static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -1723,7 +1723,7 @@ static void sock_def_wakeup(struct sock *sk) static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); @@ -1732,7 +1732,7 @@ static void sock_def_error_report(struct sock *sk) static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -1747,7 +1747,7 @@ static void sock_def_write_space(struct sock *sk) * progress. --DaveM */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/dccp/output.c b/net/dccp/output.c index c0e88c16d088..c96119fda688 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -196,7 +196,7 @@ void dccp_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 314a1b5c033c..94ca8eaace7d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -311,7 +311,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7870a535dac6..91145244ea63 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -339,7 +339,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6be5f92d1094..49c15b48408e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -306,7 +306,7 @@ static inline int iucv_below_msglim(struct sock *sk) static void iucv_sock_wake_msglim(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_all(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); @@ -1256,7 +1256,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index eac5e7bb7365..bfe493ebf27c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -63,7 +63,7 @@ static void rxrpc_write_space(struct sock *sk) _enter("%p", sk); read_lock(&sk->sk_callback_lock); if (rxrpc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -588,7 +588,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 36d4e44d6233..fc3ebb906911 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static void unix_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -1985,7 +1985,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2022,7 +2022,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2053,7 +2053,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer_get(sk); if (other) { if (unix_peer(other) != sk) { - poll_wait(file, &unix_sk(other)->peer_wait, + sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); if (unix_recvq_full(other)) writable = 0; From ad46276952f1af34cd91d46d49ba13d347d56367 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Jul 2009 12:10:31 +0000 Subject: [PATCH 703/741] memory barrier: adding smp_mb__after_lock Adding smp_mb__after_lock define to be used as a smp_mb call after a lock. Making it nop for x86, since {read|write|spin}_lock() on x86 are full memory barriers. Signed-off-by: Jiri Olsa Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/include/asm/spinlock.h | 4 ++++ include/linux/spinlock.h | 5 +++++ include/net/sock.h | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b7e5db876399..4e77853321db 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -302,4 +302,8 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() +/* The {read|write|spin}_lock() on x86 are full memory barriers. */ +static inline void smp_mb__after_lock(void) { } +#define ARCH_HAS_SMP_MB_AFTER_LOCK + #endif /* _ASM_X86_SPINLOCK_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 252b245cfcf4..4be57ab03478 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -132,6 +132,11 @@ do { \ #endif /*__raw_spin_is_contended*/ #endif +/* The lock does not imply full memory barrier. */ +#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK +static inline void smp_mb__after_lock(void) { smp_mb(); } +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/net/sock.h b/include/net/sock.h index 4eb8409249f6..2c0da9239b95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1271,6 +1271,9 @@ static inline int sk_has_allocations(const struct sock *sk) * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 * could then endup calling schedule and sleep forever if there are no more * data on the socket. + * + * The sk_has_sleeper is always called right after a call to read_lock, so we + * can use smp_mb__after_lock barrier. */ static inline int sk_has_sleeper(struct sock *sk) { @@ -1280,7 +1283,7 @@ static inline int sk_has_sleeper(struct sock *sk) * * This memory barrier is paired in the sock_poll_wait. */ - smp_mb(); + smp_mb__after_lock(); return sk->sk_sleep && waitqueue_active(sk->sk_sleep); } From 8faa2a786a5337683109d77ccf880339fdcdb332 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Thu, 9 Jul 2009 02:29:50 +0000 Subject: [PATCH 704/741] ixgbe: Fix coexistence of FCoE and Flow Director in 82599 Fix coexistence of Fiber Channel over Ethernet (FCoE) and Flow Director (FDIR) in 82599 and remove the disabling of FDIR when FCoE is enabled. Currently, FDIR is turned off when FCoE is enabled under the assumption that FCoE is always enabled with DCB being turned on. However, FDIR does not have to be turned off all the time when FCoE is enabled since FCoE can be enabled without DCB being turned on, e.g., use link pause only. This patch makes sure that when DCB is turned on or off, FDIR is turned on or off correspondingly; and when FCoE is enabled, it does not disable FDIR, rather, it will have FDIR set up properly so FCoE and FDIR can coexist regardless of DCB being on or off. Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 6 ++++++ drivers/net/ixgbe/ixgbe_main.c | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index d56890f5c9d5..7c5978ad929a 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -138,6 +138,10 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) adapter->hw.fc.requested_mode = ixgbe_fc_none; } adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + } adapter->flags |= IXGBE_FLAG_DCB_ENABLED; ixgbe_init_interrupt_scheme(adapter); if (netif_running(netdev)) @@ -154,6 +158,8 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) adapter->dcb_cfg.pfc_mode_enable = false; adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + if (adapter->hw.mac.type == ixgbe_mac_82599EB) + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; ixgbe_init_interrupt_scheme(adapter); if (netif_running(netdev)) netdev->netdev_ops->ndo_open(netdev); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index a3061aacffd8..e3442f47f932 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3130,7 +3130,11 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter) #endif if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { DPRINTK(PROBE, INFO, "FCOE enabled with RSS \n"); - ixgbe_set_rss_queues(adapter); + if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) || + (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) + ixgbe_set_fdir_queues(adapter); + else + ixgbe_set_rss_queues(adapter); } /* adding FCoE rx rings to the end */ f->mask = adapter->num_rx_queues; @@ -3388,7 +3392,12 @@ static inline bool ixgbe_cache_ring_fcoe(struct ixgbe_adapter *adapter) } #endif /* CONFIG_IXGBE_DCB */ if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { - ixgbe_cache_ring_rss(adapter); + if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) || + (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) + ixgbe_cache_ring_fdir(adapter); + else + ixgbe_cache_ring_rss(adapter); + fcoe_i = f->mask; } for (i = 0; i < f->indices; i++, fcoe_i++) @@ -5578,12 +5587,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_FCOE_CRC; netdev->features |= NETIF_F_FSO; netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; - DPRINTK(DRV, INFO, "FCoE enabled, " - "disabling Flow Director\n"); - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->flags &= - ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - adapter->atr_sample_rate = 0; } else { adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; } From e594e96e8a14101a6decabf6746bd5186287debc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 9 Jul 2009 09:30:25 +0000 Subject: [PATCH 705/741] cxgb3: Fix crash caused by stashing wrong netdev_queue Commit c3a8c5b6 ("cxgb3: move away from LLTX") exposed a bug in how cxgb3 looks up the netdev_queue it stashes away in a qset during initialization. For multiport devices, the TX queue index it uses is offset by the first_qset index of each port. This leads to a crash once LLTX is removed, since hard_start_xmit is called with one TX queue lock held, while the TX reclaim timer task grabs a different (wrong) TX queue lock when it frees skbs. Fix this by removing the first_qset offset used to look up the TX queue passed into t3_sge_alloc_qset() from setup_sge_qsets(). Signed-off-by: Roland Dreier Acked-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb3/cxgb3_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 538dda4422dc..fb5df5c6203e 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -642,8 +642,7 @@ static int setup_sge_qsets(struct adapter *adap) struct port_info *pi = netdev_priv(dev); pi->qs = &adap->sge.qs[pi->first_qset]; - for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; - ++j, ++qset_idx) { + for (j = 0; j < pi->nqsets; ++j, ++qset_idx) { set_qset_lro(dev, qset_idx, pi->rx_offload & T3_LRO); err = t3_sge_alloc_qset(adap, qset_idx, 1, (adap->flags & USING_MSIX) ? qset_idx + 1 : From 8d7ff4f2a0b22b7d6d7bc3982257d1dadea22824 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Jun 2009 11:48:14 +0200 Subject: [PATCH 706/741] x86/oprofile: rename kernel parameter for architectural perfmon to arch_perfmon The short name of the achitecture is 'arch_perfmon'. This patch changes the kernel parameter to use this name. Cc: Andi Kleen Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++-- arch/x86/oprofile/nmi_int.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92e1ab8178a8..c59e965a748d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1728,8 +1728,8 @@ and is between 256 and 4096 characters. It is defined in the file oprofile.cpu_type= Force an oprofile cpu type This might be useful if you have an older oprofile userland or if you want common events. - Format: { archperfmon } - archperfmon: [X86] Force use of architectural + Format: { arch_perfmon } + arch_perfmon: [X86] Force use of architectural perfmon on Intel CPUs instead of the CPU specific event set. diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b07dd8d0b321..89b9a5cd63da 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -390,7 +390,7 @@ static int __init p4_init(char **cpu_type) static int force_arch_perfmon; static int force_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "archperfmon")) { + if (!strcmp(str, "arch_perfmon")) { force_arch_perfmon = 1; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); } From 42359da44112565e12a5209befb36dc6b6d6cd9c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 9 Jul 2009 21:54:39 -0400 Subject: [PATCH 707/741] Fix compile error in bmac.c Looks like the change in ad361c9884e809340f6daca80d56a9e9c871690a wasn't compile tested. Signed-off-by: Dave Jones Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- drivers/net/bmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c index a76315dc7767..206144f2470f 100644 --- a/drivers/net/bmac.c +++ b/drivers/net/bmac.c @@ -431,7 +431,7 @@ bmac_init_phy(struct net_device *dev) printk(KERN_DEBUG); printk(KERN_CONT " %.4x", bmac_mif_read(dev, addr)); } - print(KERN_CONT "\n"); + printk(KERN_CONT "\n"); if (bp->is_bmac_plus) { unsigned int capable, ctrl; From afecb0d02ad5554cb59c2a30c262da200beaa002 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 9 Jul 2009 20:15:44 +0200 Subject: [PATCH 708/741] sm501fb: fix regression with uninitalized fb_info->mm_lock mutex Remove call to the fb_set_par() before the register_framebuffer(). This fixes a problem with uninitialized the fb_info->mm_lock mutex introduced by the commit 537a1bf059f " fbdev: add mutex for fb_mmap locking" Signed-off-by: Krzysztof Helt Signed-off-by: Linus Torvalds --- drivers/video/sm501fb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index 16d4f4c7d52b..924d79462780 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -1540,9 +1540,6 @@ static int sm501fb_init_fb(struct fb_info *fb, if (ret) dev_err(info->dev, "check_var() failed on initial setup?\n"); - /* ensure we've activated our new configuration */ - (fb->fbops->fb_set_par)(fb); - return 0; } From 016d3569bf7b21375451d91be6ee2ad4ffff5211 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 9 Jul 2009 20:14:10 +0200 Subject: [PATCH 709/741] mx3fb: fix regression with uninitalized fb_info->mm_lock mutex Remove call to the mx3fb_set_par() and the mx3fb_blank() before the register_framebuffer(). This fixes a problem with uninitialized the fb_info->mm_lock mutex introduced by the commit 537a1bf059f " fbdev: add mutex for fb_mmap locking" Signed-off-by: Krzysztof Helt Signed-off-by: Linus Torvalds --- drivers/video/mx3fb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c index 567fb944bd2a..f8778cde2183 100644 --- a/drivers/video/mx3fb.c +++ b/drivers/video/mx3fb.c @@ -1365,11 +1365,6 @@ static int init_fb_chan(struct mx3fb_data *mx3fb, struct idmac_channel *ichan) init_completion(&mx3fbi->flip_cmpl); disable_irq(ichan->eof_irq); dev_dbg(mx3fb->dev, "disabling irq %d\n", ichan->eof_irq); - ret = mx3fb_set_par(fbi); - if (ret < 0) - goto esetpar; - - mx3fb_blank(FB_BLANK_UNBLANK, fbi); dev_info(dev, "registered, using mode %s\n", fb_mode); From 2b8777ca0c944bf6498c45ed9c5c246bd63a719e Mon Sep 17 00:00:00 2001 From: Maynard Johnson Date: Wed, 27 May 2009 10:15:08 -0500 Subject: [PATCH 710/741] oprofile: reset bt_lost_no_mapping with other stats The bt_lost_no_mapping is not getting reset at the start of a profiling run, thus the oprofiled.log shows erroneous values for this statistic. The attached patch fixes this problem. Signed-off-by: Maynard Johnson Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar --- drivers/oprofile/oprofile_stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c index e1f6ce03705e..3c2270a8300c 100644 --- a/drivers/oprofile/oprofile_stats.c +++ b/drivers/oprofile/oprofile_stats.c @@ -33,6 +33,7 @@ void oprofile_reset_stats(void) atomic_set(&oprofile_stats.sample_lost_no_mm, 0); atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); atomic_set(&oprofile_stats.event_lost_overflow, 0); + atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); } From 8aa7e847d834ed937a9ad37a0f2ad5b8584c1ab0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Jul 2009 14:52:32 +0200 Subject: [PATCH 711/741] Fix congestion_wait() sync/async vs read/write confusion Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke the bdi congestion wait queue logic, causing us to wait on congestion for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead. Signed-off-by: Jens Axboe --- arch/x86/lib/usercopy_32.c | 2 +- drivers/block/pktcdvd.c | 10 ++++++---- drivers/md/dm-crypt.c | 2 +- fs/fat/file.c | 2 +- fs/fuse/dev.c | 8 ++++---- fs/nfs/write.c | 8 +++++--- fs/reiserfs/journal.c | 2 +- fs/xfs/linux-2.6/kmem.c | 4 ++-- fs/xfs/linux-2.6/xfs_buf.c | 2 +- include/linux/backing-dev.h | 6 +++--- include/linux/blkdev.h | 8 ++++---- mm/backing-dev.c | 7 +++---- mm/memcontrol.c | 2 +- mm/page-writeback.c | 8 ++++---- mm/page_alloc.c | 4 ++-- mm/vmscan.c | 8 ++++---- 16 files changed, 43 insertions(+), 40 deletions(-) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 7c8ca91bb9ec..1f118d462acc 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -751,7 +751,7 @@ survive: if (retval == -ENOMEM && is_global_init(current)) { up_read(¤t->mm->mmap_sem); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto survive; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 83650e00632d..99a506f619b7 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1372,8 +1372,10 @@ try_next_bio: wakeup = (pd->write_congestion_on > 0 && pd->bio_queue_size <= pd->write_congestion_off); spin_unlock(&pd->lock); - if (wakeup) - clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE); + if (wakeup) { + clear_bdi_congested(&pd->disk->queue->backing_dev_info, + BLK_RW_ASYNC); + } pkt->sleep_time = max(PACKET_WAIT_TIME, 1); pkt_set_state(pkt, PACKET_WAITING_STATE); @@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(&q->backing_dev_info, WRITE); + set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); do { spin_unlock(&pd->lock); - congestion_wait(WRITE, HZ); + congestion_wait(BLK_RW_ASYNC, HZ); spin_lock(&pd->lock); } while(pd->bio_queue_size > pd->write_congestion_off); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9933eb861c71..529e2ba505c3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * But don't wait if split was due to the io size restriction */ if (unlikely(out_of_pages)) - congestion_wait(WRITE, HZ/100); + congestion_wait(BLK_RW_ASYNC, HZ/100); /* * With async crypto it is unsafe to share the crypto context diff --git a/fs/fat/file.c b/fs/fat/file.c index b28ea646ff60..f042b965c95c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp) if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } return 0; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f58ecbc416c8..6484eb75acd6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -286,8 +286,8 @@ __releases(&fc->lock) } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->connected && fc->bdi_initialized) { - clear_bdi_congested(&fc->bdi, READ); - clear_bdi_congested(&fc->bdi, WRITE); + clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); + clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } fc->num_background--; fc->active_background--; @@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->blocked = 1; if (fc->num_background == FUSE_CONGESTION_THRESHOLD && fc->bdi_initialized) { - set_bdi_congested(&fc->bdi, READ); - set_bdi_congested(&fc->bdi, WRITE); + set_bdi_congested(&fc->bdi, BLK_RW_SYNC); + set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); } list_add_tail(&req->list, &fc->bg_queue); flush_bg_queue(fc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce728829f79a..0a0a2ff767c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) - set_bdi_congested(&nfss->backing_dev_info, WRITE); + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); + } } return ret; } @@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page) end_page_writeback(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, WRITE); + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } /* diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 77f5bb746bf0..90622200b39c 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s) DEFINE_WAIT(wait); struct reiserfs_journal *j = SB_JOURNAL(s); if (atomic_read(&j->j_async_throttle)) - congestion_wait(WRITE, HZ / 10); + congestion_wait(BLK_RW_ASYNC, HZ / 10); return 0; } diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 1cd3b55ee3d2..2d3f90afe5f1 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __func__, lflags); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 1418b916fc27..0c93c7ef3d18 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -412,7 +412,7 @@ _xfs_buf_lookup_pages( XFS_STATS_INC(xb_page_retries); xfsbufd_wakeup(0, gfp_mask); - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0ec2c594868e..3a52a63c1351 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << BDI_async_congested)); } -void clear_bdi_congested(struct backing_dev_info *bdi, int rw); -void set_bdi_congested(struct backing_dev_info *bdi, int rw); -long congestion_wait(int rw, long timeout); +void clear_bdi_congested(struct backing_dev_info *bdi, int sync); +void set_bdi_congested(struct backing_dev_info *bdi, int sync); +long congestion_wait(int sync, long timeout); static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49ae07951d55..bb3d39978701 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, * congested queues, and wake up anyone who was waiting for requests to be * put back. */ -static inline void blk_clear_queue_congested(struct request_queue *q, int rw) +static inline void blk_clear_queue_congested(struct request_queue *q, int sync) { - clear_bdi_congested(&q->backing_dev_info, rw); + clear_bdi_congested(&q->backing_dev_info, sync); } /* * A queue has just entered congestion. Flag that in the queue's VM-visible * state flags and increment the global gounter of congested queues. */ -static inline void blk_set_queue_congested(struct request_queue *q, int rw) +static inline void blk_set_queue_congested(struct request_queue *q, int sync) { - set_bdi_congested(&q->backing_dev_info, rw); + set_bdi_congested(&q->backing_dev_info, sync); } extern void blk_start_queue(struct request_queue *q); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 493b468a5035..c86edd244294 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) }; - void clear_bdi_congested(struct backing_dev_info *bdi, int sync) { enum bdi_state bit; @@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested); /** * congestion_wait - wait for a backing_dev to become uncongested - * @rw: READ or WRITE + * @sync: SYNC or ASYNC IO * @timeout: timeout in jiffies * * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit * write congestion. If no backing_devs are congested then just wait for the * next write to be completed. */ -long congestion_wait(int rw, long timeout) +long congestion_wait(int sync, long timeout) { long ret; DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[rw]; + wait_queue_head_t *wqh = &congestion_wqh[sync]; prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = io_schedule_timeout(timeout); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2fa20dadf40..e717964cb5a0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1973,7 +1973,7 @@ try_to_free: if (!progress) { nr_retries--; /* maybe some writeback is necessary */ - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7687879253b9..81627ebcd313 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping) if (pages_written >= write_chunk) break; /* We've done our duty */ - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && @@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask) if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* * The caller might hold locks which can prevent IO completion @@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages) if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ if (wbc.encountered_congestion || wbc.more_io) - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); else break; } @@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg) writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { if (wbc.encountered_congestion || wbc.more_io) - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); else break; /* All the old data is written */ } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ad7cd1c56b07..a35eeab2724c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, preferred_zone, migratetype); if (!page && gfp_mask & __GFP_NOFAIL) - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (!page && (gfp_mask & __GFP_NOFAIL)); return page; @@ -1831,7 +1831,7 @@ rebalance: pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { /* Wait for some write requests to complete then retry */ - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto rebalance; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 54155268dfca..dea7abd31098 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, */ if (nr_freed < nr_taken && !current_is_kswapd() && lumpy_reclaim) { - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* * The attempt at page out may have made some @@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (sc->nr_scanned && priority < DEF_PRIORITY - 2) - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); } /* top priority shrink_zones still had more to do? don't OOM, then */ if (!sc->all_unreclaimable && scanning_global_lru(sc)) @@ -1960,7 +1960,7 @@ loop_again: * another pass across the zones. */ if (total_scanned && priority < DEF_PRIORITY - 2) - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* * We do this so kswapd doesn't build up large priorities for @@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) goto out; if (sc.nr_scanned && prio < DEF_PRIORITY - 2) - congestion_wait(WRITE, HZ / 10); + congestion_wait(BLK_RW_ASYNC, HZ / 10); } } From 76da03467a1a78811777561bbb1fa56175ee4778 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 9 Jul 2009 09:48:28 +0200 Subject: [PATCH 712/741] block: call blk_scsi_ioctl_init() Currently, blk_scsi_ioctl_init() is not called since it lacks an initcall marking. This causes the command table to be unitialized, hence somce commands are block when they should not have been. This fixes a regression introduced by commit 018e0446890661504783f92388ecce7138c1566d Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index f0e0ce0a607d..e5b10017a50b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -680,3 +680,4 @@ int __init blk_scsi_ioctl_init(void) blk_set_cmd_filter_defaults(&blk_default_cmd_filter); return 0; } +fs_initcall(blk_scsi_ioctl_init); From ecb554a846f8e9d2a58f6d6c118168a63ac065aa Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 9 Jul 2009 14:46:53 +0200 Subject: [PATCH 713/741] block: fix sg SG_DXFER_TO_FROM_DEV regression I overlooked SG_DXFER_TO_FROM_DEV support when I converted sg to use the block layer mapping API (2.6.28). Douglas Gilbert explained SG_DXFER_TO_FROM_DEV: http://www.spinics.net/lists/linux-scsi/msg37135.html = The semantics of SG_DXFER_TO_FROM_DEV were: - copy user space buffer to kernel (LLD) buffer - do SCSI command which is assumed to be of the DATA_IN (data from device) variety. This would overwrite some or all of the kernel buffer - copy kernel (LLD) buffer back to the user space. The idea was to detect short reads by filling the original user space buffer with some marker bytes ("0xec" it would seem in this report). The "resid" value is a better way of detecting short reads but that was only added this century and requires co-operation from the LLD. = This patch changes the block layer mapping API to support this semantics. This simply adds another field to struct rq_map_data and enables __bio_copy_iov() to copy data from user space even with READ requests. It's better to add the flags field and kills null_mapped and the new from_user fields in struct rq_map_data but that approach makes it difficult to send this patch to stable trees because st and osst drivers use struct rq_map_data (they were converted to use the block layer in 2.6.29 and 2.6.30). Well, I should clean up the block layer mapping API. zhou sf reported this regiression and tested this patch: http://www.spinics.net/lists/linux-scsi/msg37128.html http://www.spinics.net/lists/linux-scsi/msg37168.html Reported-by: zhou sf Tested-by: zhou sf Cc: stable@kernel.org Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 4 ++++ fs/bio.c | 22 ++++++++++++---------- include/linux/blkdev.h | 1 + 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4d6f2fe1cfe9..9230402c45af 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1656,6 +1656,10 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) md->nr_entries = req_schp->k_use_sg; md->offset = 0; md->null_mapped = hp->dxferp ? 0 : 1; + if (dxfer_dir == SG_DXFER_TO_FROM_DEV) + md->from_user = 1; + else + md->from_user = 0; } if (iov_count) { diff --git a/fs/bio.c b/fs/bio.c index 1486b19fc431..76738005c8e8 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy, - int do_free_page) + struct sg_iovec *iov, int iov_count, + int to_user, int from_user, int do_free_page) { int ret = 0, i; struct bio_vec *bvec; int iov_idx = 0; unsigned int iov_off = 0; - int read = bio_data_dir(bio) == READ; __bio_for_each_segment(bvec, bio, i, 0) { char *bv_addr = page_address(bvec->bv_page); @@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, iov_addr = iov[iov_idx].iov_base + iov_off; if (!ret) { - if (!read && !uncopy) - ret = copy_from_user(bv_addr, iov_addr, - bytes); - if (read && uncopy) + if (to_user) ret = copy_to_user(iov_addr, bv_addr, bytes); + if (from_user) + ret = copy_from_user(bv_addr, iov_addr, + bytes); + if (ret) ret = -EFAULT; } @@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, 1, bmd->is_our_pages); + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); return ret; @@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (!write_to_vm && (!map_data || !map_data->null_mapped)) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); + if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || + (map_data && map_data->from_user)) { + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0); if (ret) goto cleanup; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb3d39978701..0146e0fecf1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -723,6 +723,7 @@ struct rq_map_data { int nr_entries; unsigned long offset; int null_mapped; + int from_user; }; struct req_iterator { From 32f2e807a3938b24d0831211e6094f9e44b2fc83 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 9 Jul 2009 22:13:16 +0200 Subject: [PATCH 714/741] cfq-iosched: reset oom_cfqq in cfq_set_request() In case memory is scarce, we now default to oom_cfqq. Once memory is available again, we should allocate a new cfqq and stop using oom_cfqq for a particular io context. Once a new request comes in, check if we are using oom_cfqq, and if yes, try to allocate a new cfqq. Tested the patch by forcing the use of oom_cfqq and upon next request thread realized that it was using oom_cfqq and it allocated a new cfqq. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 87276eb83f7f..fd7080ed7935 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2311,7 +2311,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) goto queue_fail; cfqq = cic_to_cfqq(cic, is_sync); - if (!cfqq) { + if (!cfqq || cfqq == &cfqd->oom_cfqq) { cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); cic_set_cfqq(cic, cfqq, is_sync); } From 2a34f5e6b61c7e8f3b6f25847bcda88511b0ead4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 2 Jul 2009 09:30:50 -0700 Subject: [PATCH 715/741] drm/i915: Disable GEM when a broken video BIOS takes up the whole aperture. This is seen on some G41 systems, where the BIOS will consume all but a few KB of the aperture. This should be bad for all operating systems, as it means that the OS can't dynamically manage memory between graphics and the rest of the system, and OSes that did static memory management statically add memory in addition to the BIOS allocation anyway. So, instead of working around it, just fail out verbosely. fd.o bug #21574 Signed-off-by: Eric Anholt Reviewed-by: Ian Romanick --- drivers/gpu/drm/i915/i915_dma.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0e704bb26e99..8c4783180bf6 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -885,8 +885,8 @@ static int i915_set_status_page(struct drm_device *dev, void *data, * some RAM for the framebuffer at early boot. This code figures out * how much was set aside so we can use it for our own purposes. */ -static int i915_probe_agp(struct drm_device *dev, unsigned long *aperture_size, - unsigned long *preallocated_size) +static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, + uint32_t *preallocated_size) { struct pci_dev *bridge_dev; u16 tmp = 0; @@ -984,10 +984,11 @@ static int i915_probe_agp(struct drm_device *dev, unsigned long *aperture_size, return 0; } -static int i915_load_modeset_init(struct drm_device *dev) +static int i915_load_modeset_init(struct drm_device *dev, + unsigned long prealloc_size, + unsigned long agp_size) { struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long agp_size, prealloc_size; int fb_bar = IS_I9XX(dev) ? 2 : 0; int ret = 0; @@ -1002,10 +1003,6 @@ static int i915_load_modeset_init(struct drm_device *dev) if (IS_I965G(dev) || IS_G33(dev)) dev_priv->cursor_needs_physical = false; - ret = i915_probe_agp(dev, &agp_size, &prealloc_size); - if (ret) - goto out; - /* Basic memrange allocator for stolen space (aka vram) */ drm_mm_init(&dev_priv->vram, 0, prealloc_size); @@ -1136,6 +1133,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) struct drm_i915_private *dev_priv = dev->dev_private; resource_size_t base, size; int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1; + uint32_t agp_size, prealloc_size; /* i915 has 4 more counters */ dev->counters += 4; @@ -1184,9 +1182,22 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) "performance may suffer.\n"); } + ret = i915_probe_agp(dev, &agp_size, &prealloc_size); + if (ret) + goto out_iomapfree; + /* enable GEM by default */ dev_priv->has_gem = 1; + if (prealloc_size > agp_size * 3 / 4) { + DRM_ERROR("Detected broken video BIOS with %d/%dkB of video " + "memory stolen.\n", + prealloc_size / 1024, agp_size / 1024); + DRM_ERROR("Disabling GEM. (try reducing stolen memory or " + "updating the BIOS to fix).\n"); + dev_priv->has_gem = 0; + } + dev->driver->get_vblank_counter = i915_get_vblank_counter; dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ if (IS_G4X(dev) || IS_IGDNG(dev)) { @@ -1231,7 +1242,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) } if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = i915_load_modeset_init(dev); + ret = i915_load_modeset_init(dev, prealloc_size, agp_size); if (ret < 0) { DRM_ERROR("failed to init modeset\n"); goto out_rmmap; From 883e860daf5c75a0035c33cb6f8881ee62d6efaf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 10 Jul 2009 12:28:30 -0700 Subject: [PATCH 716/741] drm/i915: Fix harmless warning from patch merged after i2c rework. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_crt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 954b8599fc76..d6a1a6e5539a 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -430,7 +430,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) { int ret; struct intel_output *intel_output = to_intel_output(connector); - struct intel_i2c_chan *ddcbus; + struct i2c_adapter *ddcbus; struct drm_device *dev = connector->dev; From 354ff96772540d2e836194bf14dd9c05c274055c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 8 Jul 2009 14:13:12 +0800 Subject: [PATCH 717/741] drm/i915: Restore the KMS modeset for every activated CRTC Restore the modeset for every activated CRTC in course of resume. This is realized by calling the function of drm_helper_resume_force_mode. Note: it is meaningful only for the KMS mode. https://bugs.freedesktop.org/show_bug.cgi?id=21719 https://bugs.freedesktop.org/show_bug.cgi?id=21708 https://bugs.freedesktop.org/show_bug.cgi?id=22285 https://bugs.freedesktop.org/show_bug.cgi?id=22263 Signed-off-by: Zhao Yakui Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index aef2a261a837..fc4b68aa2d05 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -35,6 +35,7 @@ #include "drm_pciids.h" #include +#include "drm_crtc_helper.h" static unsigned int i915_modeset = -1; module_param_named(modeset, i915_modeset, int, 0400); @@ -115,6 +116,10 @@ static int i915_resume(struct drm_device *dev) drm_irq_install(dev); } + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + /* Resume the modeset for every activated CRTC */ + drm_helper_resume_force_mode(dev); + } return ret; } From af4fcb574efa90373b02ae0bb8b54d710c32eeb4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 8 Jul 2009 14:13:13 +0800 Subject: [PATCH 718/741] drm: Disable the unused connectors explicitly when resuming with KMS. Signed-off-by: Zhao Yakui Acked-by: Jesse Barnes Acked-by: Dave Airlie Signed-off-by: Eric Anholt --- drivers/gpu/drm/drm_crtc_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index a6f73f1e99d9..3da9cfa31848 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -1090,6 +1090,8 @@ int drm_helper_resume_force_mode(struct drm_device *dev) if (ret == false) DRM_ERROR("failed to set mode on crtc %p\n", crtc); } + /* disable the unused connectors while restoring the modesetting */ + drm_helper_disable_unused_functions(dev); return 0; } EXPORT_SYMBOL(drm_helper_resume_force_mode); From fccdaba4317604602e5802c3afc4021f2fb8132e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 8 Jul 2009 14:13:14 +0800 Subject: [PATCH 719/741] drm/i915: Avoid saving/restore the modesetting registers twice in KMS mode In KMS mode we now use the normal mode-setting paths to set the modes back to the current configuration, so we don't need to also run the more limited non-KMS implementation of modesetting for resume. Signed-off-by: Zhao Yakui Acked-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_suspend.c | 221 +++++++++++++++------------- 1 file changed, 121 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 8d8e083d14ab..9e1d16e5c3ea 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -222,23 +222,12 @@ static void i915_restore_vga(struct drm_device *dev) I915_WRITE8(VGA_DACMASK, dev_priv->saveDACMASK); } -int i915_save_state(struct drm_device *dev) +static void i915_save_modeset_reg(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); - - /* Hardware status page */ - dev_priv->saveHWS = I915_READ(HWS_PGA); - - /* Display arbitration control */ - dev_priv->saveDSPARB = I915_READ(DSPARB); + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return; /* Pipe & plane A info */ dev_priv->savePIPEACONF = I915_READ(PIPEACONF); dev_priv->savePIPEASRC = I915_READ(PIPEASRC); @@ -294,7 +283,122 @@ int i915_save_state(struct drm_device *dev) } i915_save_palette(dev, PIPE_B); dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); + return; +} +static void i915_restore_modeset_reg(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return; + + /* Pipe & plane A info */ + /* Prime the clock */ + if (dev_priv->saveDPLL_A & DPLL_VCO_ENABLE) { + I915_WRITE(DPLL_A, dev_priv->saveDPLL_A & + ~DPLL_VCO_ENABLE); + DRM_UDELAY(150); + } + I915_WRITE(FPA0, dev_priv->saveFPA0); + I915_WRITE(FPA1, dev_priv->saveFPA1); + /* Actually enable it */ + I915_WRITE(DPLL_A, dev_priv->saveDPLL_A); + DRM_UDELAY(150); + if (IS_I965G(dev)) + I915_WRITE(DPLL_A_MD, dev_priv->saveDPLL_A_MD); + DRM_UDELAY(150); + + /* Restore mode */ + I915_WRITE(HTOTAL_A, dev_priv->saveHTOTAL_A); + I915_WRITE(HBLANK_A, dev_priv->saveHBLANK_A); + I915_WRITE(HSYNC_A, dev_priv->saveHSYNC_A); + I915_WRITE(VTOTAL_A, dev_priv->saveVTOTAL_A); + I915_WRITE(VBLANK_A, dev_priv->saveVBLANK_A); + I915_WRITE(VSYNC_A, dev_priv->saveVSYNC_A); + I915_WRITE(BCLRPAT_A, dev_priv->saveBCLRPAT_A); + + /* Restore plane info */ + I915_WRITE(DSPASIZE, dev_priv->saveDSPASIZE); + I915_WRITE(DSPAPOS, dev_priv->saveDSPAPOS); + I915_WRITE(PIPEASRC, dev_priv->savePIPEASRC); + I915_WRITE(DSPAADDR, dev_priv->saveDSPAADDR); + I915_WRITE(DSPASTRIDE, dev_priv->saveDSPASTRIDE); + if (IS_I965G(dev)) { + I915_WRITE(DSPASURF, dev_priv->saveDSPASURF); + I915_WRITE(DSPATILEOFF, dev_priv->saveDSPATILEOFF); + } + + I915_WRITE(PIPEACONF, dev_priv->savePIPEACONF); + + i915_restore_palette(dev, PIPE_A); + /* Enable the plane */ + I915_WRITE(DSPACNTR, dev_priv->saveDSPACNTR); + I915_WRITE(DSPAADDR, I915_READ(DSPAADDR)); + + /* Pipe & plane B info */ + if (dev_priv->saveDPLL_B & DPLL_VCO_ENABLE) { + I915_WRITE(DPLL_B, dev_priv->saveDPLL_B & + ~DPLL_VCO_ENABLE); + DRM_UDELAY(150); + } + I915_WRITE(FPB0, dev_priv->saveFPB0); + I915_WRITE(FPB1, dev_priv->saveFPB1); + /* Actually enable it */ + I915_WRITE(DPLL_B, dev_priv->saveDPLL_B); + DRM_UDELAY(150); + if (IS_I965G(dev)) + I915_WRITE(DPLL_B_MD, dev_priv->saveDPLL_B_MD); + DRM_UDELAY(150); + + /* Restore mode */ + I915_WRITE(HTOTAL_B, dev_priv->saveHTOTAL_B); + I915_WRITE(HBLANK_B, dev_priv->saveHBLANK_B); + I915_WRITE(HSYNC_B, dev_priv->saveHSYNC_B); + I915_WRITE(VTOTAL_B, dev_priv->saveVTOTAL_B); + I915_WRITE(VBLANK_B, dev_priv->saveVBLANK_B); + I915_WRITE(VSYNC_B, dev_priv->saveVSYNC_B); + I915_WRITE(BCLRPAT_B, dev_priv->saveBCLRPAT_B); + + /* Restore plane info */ + I915_WRITE(DSPBSIZE, dev_priv->saveDSPBSIZE); + I915_WRITE(DSPBPOS, dev_priv->saveDSPBPOS); + I915_WRITE(PIPEBSRC, dev_priv->savePIPEBSRC); + I915_WRITE(DSPBADDR, dev_priv->saveDSPBADDR); + I915_WRITE(DSPBSTRIDE, dev_priv->saveDSPBSTRIDE); + if (IS_I965G(dev)) { + I915_WRITE(DSPBSURF, dev_priv->saveDSPBSURF); + I915_WRITE(DSPBTILEOFF, dev_priv->saveDSPBTILEOFF); + } + + I915_WRITE(PIPEBCONF, dev_priv->savePIPEBCONF); + + i915_restore_palette(dev, PIPE_B); + /* Enable the plane */ + I915_WRITE(DSPBCNTR, dev_priv->saveDSPBCNTR); + I915_WRITE(DSPBADDR, I915_READ(DSPBADDR)); + + return; +} +int i915_save_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); + + /* Hardware status page */ + dev_priv->saveHWS = I915_READ(HWS_PGA); + + /* Display arbitration control */ + dev_priv->saveDSPARB = I915_READ(DSPARB); + + /* This is only meaningful in non-KMS mode */ + /* Don't save them in KMS mode */ + i915_save_modeset_reg(dev); /* Cursor state */ dev_priv->saveCURACNTR = I915_READ(CURACNTR); dev_priv->saveCURAPOS = I915_READ(CURAPOS); @@ -430,92 +534,9 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); } - - /* Pipe & plane A info */ - /* Prime the clock */ - if (dev_priv->saveDPLL_A & DPLL_VCO_ENABLE) { - I915_WRITE(DPLL_A, dev_priv->saveDPLL_A & - ~DPLL_VCO_ENABLE); - DRM_UDELAY(150); - } - I915_WRITE(FPA0, dev_priv->saveFPA0); - I915_WRITE(FPA1, dev_priv->saveFPA1); - /* Actually enable it */ - I915_WRITE(DPLL_A, dev_priv->saveDPLL_A); - DRM_UDELAY(150); - if (IS_I965G(dev)) - I915_WRITE(DPLL_A_MD, dev_priv->saveDPLL_A_MD); - DRM_UDELAY(150); - - /* Restore mode */ - I915_WRITE(HTOTAL_A, dev_priv->saveHTOTAL_A); - I915_WRITE(HBLANK_A, dev_priv->saveHBLANK_A); - I915_WRITE(HSYNC_A, dev_priv->saveHSYNC_A); - I915_WRITE(VTOTAL_A, dev_priv->saveVTOTAL_A); - I915_WRITE(VBLANK_A, dev_priv->saveVBLANK_A); - I915_WRITE(VSYNC_A, dev_priv->saveVSYNC_A); - I915_WRITE(BCLRPAT_A, dev_priv->saveBCLRPAT_A); - - /* Restore plane info */ - I915_WRITE(DSPASIZE, dev_priv->saveDSPASIZE); - I915_WRITE(DSPAPOS, dev_priv->saveDSPAPOS); - I915_WRITE(PIPEASRC, dev_priv->savePIPEASRC); - I915_WRITE(DSPAADDR, dev_priv->saveDSPAADDR); - I915_WRITE(DSPASTRIDE, dev_priv->saveDSPASTRIDE); - if (IS_I965G(dev)) { - I915_WRITE(DSPASURF, dev_priv->saveDSPASURF); - I915_WRITE(DSPATILEOFF, dev_priv->saveDSPATILEOFF); - } - - I915_WRITE(PIPEACONF, dev_priv->savePIPEACONF); - - i915_restore_palette(dev, PIPE_A); - /* Enable the plane */ - I915_WRITE(DSPACNTR, dev_priv->saveDSPACNTR); - I915_WRITE(DSPAADDR, I915_READ(DSPAADDR)); - - /* Pipe & plane B info */ - if (dev_priv->saveDPLL_B & DPLL_VCO_ENABLE) { - I915_WRITE(DPLL_B, dev_priv->saveDPLL_B & - ~DPLL_VCO_ENABLE); - DRM_UDELAY(150); - } - I915_WRITE(FPB0, dev_priv->saveFPB0); - I915_WRITE(FPB1, dev_priv->saveFPB1); - /* Actually enable it */ - I915_WRITE(DPLL_B, dev_priv->saveDPLL_B); - DRM_UDELAY(150); - if (IS_I965G(dev)) - I915_WRITE(DPLL_B_MD, dev_priv->saveDPLL_B_MD); - DRM_UDELAY(150); - - /* Restore mode */ - I915_WRITE(HTOTAL_B, dev_priv->saveHTOTAL_B); - I915_WRITE(HBLANK_B, dev_priv->saveHBLANK_B); - I915_WRITE(HSYNC_B, dev_priv->saveHSYNC_B); - I915_WRITE(VTOTAL_B, dev_priv->saveVTOTAL_B); - I915_WRITE(VBLANK_B, dev_priv->saveVBLANK_B); - I915_WRITE(VSYNC_B, dev_priv->saveVSYNC_B); - I915_WRITE(BCLRPAT_B, dev_priv->saveBCLRPAT_B); - - /* Restore plane info */ - I915_WRITE(DSPBSIZE, dev_priv->saveDSPBSIZE); - I915_WRITE(DSPBPOS, dev_priv->saveDSPBPOS); - I915_WRITE(PIPEBSRC, dev_priv->savePIPEBSRC); - I915_WRITE(DSPBADDR, dev_priv->saveDSPBADDR); - I915_WRITE(DSPBSTRIDE, dev_priv->saveDSPBSTRIDE); - if (IS_I965G(dev)) { - I915_WRITE(DSPBSURF, dev_priv->saveDSPBSURF); - I915_WRITE(DSPBTILEOFF, dev_priv->saveDSPBTILEOFF); - } - - I915_WRITE(PIPEBCONF, dev_priv->savePIPEBCONF); - - i915_restore_palette(dev, PIPE_B); - /* Enable the plane */ - I915_WRITE(DSPBCNTR, dev_priv->saveDSPBCNTR); - I915_WRITE(DSPBADDR, I915_READ(DSPBADDR)); - + /* This is only meaningful in non-KMS mode */ + /* Don't restore them in KMS mode */ + i915_restore_modeset_reg(dev); /* Cursor state */ I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); From f39d1b9792881ce4eb982ec8cc65258bf95674b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 10 Jul 2009 21:38:02 +0200 Subject: [PATCH 720/741] dma-debug: Fix the overlap() function to be correct and readable Linus noticed how unclean and buggy the overlap() function is: - It uses convoluted (and bug-causing) positive checks for range overlap - instead of using a more natural negative check. - Even the positive checks are buggy: a positive intersection check has four natural cases while we checked only for three, missing the (addr < start && addr2 == end) case for example. - The variables are mis-named, making it non-obvious how the check was done. - It needlessly uses u64 instead of unsigned long. Since these are kernel memory pointers and we explicitly exclude highmem ranges anyway we cannot ever overflow 32 bits, even if we could. (and on 64-bit it doesnt matter anyway) All in one, this function needs a total revamp. I used Linus's suggestions minus the paranoid checks (we cannot overflow really because if we get totally bad DMA ranges passed far more things break in the systems than just DMA debugging). I also fixed a few other small details i noticed. Reported-by: Linus Torvalds Cc: Joerg Roedel Signed-off-by: Ingo Molnar --- lib/dma-debug.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index c9187fed0b93..65b0d99b6d0a 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -856,22 +856,21 @@ static void check_for_stack(struct device *dev, void *addr) "stack [addr=%p]\n", addr); } -static inline bool overlap(void *addr, u64 size, void *start, void *end) +static inline bool overlap(void *addr, unsigned long len, void *start, void *end) { - void *addr2 = (char *)addr + size; + unsigned long a1 = (unsigned long)addr; + unsigned long b1 = a1 + len; + unsigned long a2 = (unsigned long)start; + unsigned long b2 = (unsigned long)end; - return ((addr >= start && addr < end) || - (addr2 >= start && addr2 < end) || - ((addr < start) && (addr2 > end))); + return !(b1 <= a2 || a1 >= b2); } -static void check_for_illegal_area(struct device *dev, void *addr, u64 size) +static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, size, _text, _etext) || - overlap(addr, size, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps " - "memory from kernel text or rodata " - "[addr=%p] [size=%llu]\n", addr, size); + if (overlap(addr, len, _text, _etext) || + overlap(addr, len, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } static void check_sync(struct device *dev, @@ -969,7 +968,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->type = dma_debug_single; if (!PageHighMem(page)) { - void *addr = ((char *)page_address(page)) + offset; + void *addr = page_address(page) + offset; + check_for_stack(dev, addr); check_for_illegal_area(dev, addr, size); } From 901782b21ecb2af4dde1598b3142bf0e80b20853 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Jul 2009 08:18:50 +0100 Subject: [PATCH 721/741] drm/i915: Refactor calls to unmap_mapping_range As we call unmap_mapping_range() twice in identical fashion, refactor and attempt to explain why we need to call unmap_mapping_range(). Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 36 ++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 876b65cb7629..08d8e5d85955 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1252,6 +1252,31 @@ out_free_list: return ret; } +/** + * i915_gem_release_mmap - remove physical page mappings + * @obj: obj in question + * + * Preserve the reservation of the mmaping with the DRM core code, but + * relinquish ownership of the pages back to the system. + * + * It is vital that we remove the page mapping if we have mapped a tiled + * object through the GTT and then lose the fence register due to + * resource pressure. Similarly if the object has been moved out of the + * aperture, than pages mapped into userspace must be revoked. Removing the + * mapping will then trigger a page fault on the next user access, allowing + * fixup by i915_gem_fault(). + */ +static void +i915_gem_release_mmap(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + if (dev->dev_mapping) + unmap_mapping_range(dev->dev_mapping, + obj_priv->mmap_offset, obj->size, 1); +} + static void i915_gem_free_mmap_offset(struct drm_gem_object *obj) { @@ -1861,7 +1886,6 @@ i915_gem_object_unbind(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; - loff_t offset; int ret = 0; #if WATCH_BUF @@ -1898,9 +1922,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) BUG_ON(obj_priv->active); /* blow away mappings if mapped through GTT */ - offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT; - if (dev->dev_mapping) - unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1); + i915_gem_release_mmap(obj); if (obj_priv->fence_reg != I915_FENCE_REG_NONE) i915_gem_clear_fence_reg(obj); @@ -2222,7 +2244,6 @@ try_again: /* None available, try to steal one or wait for a user to finish */ if (i == dev_priv->num_fence_regs) { uint32_t seqno = dev_priv->mm.next_gem_seqno; - loff_t offset; if (avail == 0) return -ENOSPC; @@ -2274,10 +2295,7 @@ try_again: * Zap this virtual mapping so we can set up a fence again * for this object next time we need it. */ - offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT; - if (dev->dev_mapping) - unmap_mapping_range(dev->dev_mapping, offset, - reg->obj->size, 1); + i915_gem_release_mmap(reg->obj); old_obj_priv->fence_reg = I915_FENCE_REG_NONE; } From d05ca301997c94c2ef3c112b15319d13fa8cddab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 10 Jul 2009 13:02:26 -0700 Subject: [PATCH 722/741] drm/i915: Zap the GTT mapping when transitioning from untiled to tiled. As of 52dc7d32b88156248167864f77a9026abe27b432, we could leave an old linear GTT mapping in place, so that apps trying to GTT-mapped write in tiled data wouldn't get the fence added, and garbage would get displayed. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9d6889799d01..d08752875885 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -673,6 +673,7 @@ void i915_gem_free_object(struct drm_gem_object *obj); int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment); void i915_gem_object_unpin(struct drm_gem_object *obj); int i915_gem_object_unbind(struct drm_gem_object *obj); +void i915_gem_release_mmap(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev); int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 08d8e5d85955..5bf420378b6d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1266,7 +1266,7 @@ out_free_list: * mapping will then trigger a page fault on the next user access, allowing * fixup by i915_gem_fault(). */ -static void +void i915_gem_release_mmap(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index daeae62e1c28..a2d527b22ec4 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -521,6 +521,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, goto err; } + /* If we've changed tiling, GTT-mappings of the object + * need to re-fault to ensure that the correct fence register + * setup is in place. + */ + i915_gem_release_mmap(obj); + obj_priv->tiling_mode = args->tiling_mode; obj_priv->stride = args->stride; } From 5019914ca3b7517b2d82db2e92e3f683ac48e535 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 10 Jul 2009 14:39:59 +0800 Subject: [PATCH 723/741] drm/i915: Fix for LVDS VBT change on IGDNG IGDNG mobile chip's LVDS data block removes panel fitting register definition. So this fixes offset for LVDS timing block parsing. Thanks for Michael Fu to catch this. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_bios.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index da22863c05c0..7cc447191028 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -97,6 +97,7 @@ static void parse_lfp_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) { + struct drm_device *dev = dev_priv->dev; struct bdb_lvds_options *lvds_options; struct bdb_lvds_lfp_data *lvds_lfp_data; struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; @@ -132,7 +133,14 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, entry = (struct bdb_lvds_lfp_data_entry *) ((uint8_t *)lvds_lfp_data->data + (lfp_data_size * lvds_options->panel_type)); - dvo_timing = &entry->dvo_timing; + + /* On IGDNG mobile, LVDS data block removes panel fitting registers. + So dec 2 dword from dvo_timing offset */ + if (IS_IGDNG(dev)) + dvo_timing = (struct lvds_dvo_timing *) + ((u8 *)&entry->dvo_timing - 8); + else + dvo_timing = &entry->dvo_timing; panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL); From c99e6efe1ba04561e7d93a81f0be07e37427e835 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 10 Jul 2009 14:57:56 +0200 Subject: [PATCH 724/741] sched: INIT_PREEMPT_COUNT Pull the initial preempt_count value into a single definition site. Maintainers for: alpha, ia64 and m68k, please have a look, your arch code is funny. The header magic is a bit odd, but similar to the KERNEL_DS one, CPP waits with expanding these macros until the INIT_THREAD_INFO macro itself is expanded, which is in arch/*/kernel/init_task.c where we've already included sched.h so we're good. Cc: tony.luck@intel.com Cc: rth@twiddle.net Cc: geert@linux-m68k.org Signed-off-by: Peter Zijlstra Acked-by: Matt Mackall Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/thread_info.h | 1 + arch/arm/include/asm/thread_info.h | 2 +- arch/avr32/include/asm/thread_info.h | 2 +- arch/blackfin/include/asm/thread_info.h | 2 +- arch/cris/include/asm/thread_info.h | 4 +--- arch/frv/include/asm/thread_info.h | 4 +--- arch/h8300/include/asm/thread_info.h | 2 +- arch/ia64/include/asm/thread_info.h | 2 +- arch/m32r/include/asm/thread_info.h | 4 +--- arch/m68k/include/asm/thread_info_mm.h | 1 + arch/m68k/include/asm/thread_info_no.h | 1 + arch/microblaze/include/asm/thread_info.h | 4 +--- arch/mips/include/asm/thread_info.h | 4 +--- arch/mn10300/include/asm/thread_info.h | 4 +--- arch/parisc/include/asm/thread_info.h | 2 +- arch/powerpc/include/asm/thread_info.h | 4 +--- arch/s390/include/asm/thread_info.h | 2 +- arch/sh/include/asm/thread_info.h | 2 +- arch/sparc/include/asm/thread_info_32.h | 4 +--- arch/sparc/include/asm/thread_info_64.h | 4 +--- arch/um/include/asm/thread_info.h | 2 +- arch/x86/include/asm/thread_info.h | 2 +- arch/xtensa/include/asm/thread_info.h | 4 +--- include/linux/sched.h | 6 ++++++ 24 files changed, 29 insertions(+), 40 deletions(-) diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index d069526bd767..60c83abfde70 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -37,6 +37,7 @@ struct thread_info { .task = &tsk, \ .exec_domain = &default_exec_domain, \ .addr_limit = KERNEL_DS, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 4f8848260ee2..73394e50cbca 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -73,7 +73,7 @@ struct thread_info { .task = &tsk, \ .exec_domain = &default_exec_domain, \ .flags = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .cpu_domain = domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h index 4442f8d2d423..fc42de5ca209 100644 --- a/arch/avr32/include/asm/thread_info.h +++ b/arch/avr32/include/asm/thread_info.h @@ -40,7 +40,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall \ } \ diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h index 2920087516f2..2bbfdd950afc 100644 --- a/arch/blackfin/include/asm/thread_info.h +++ b/arch/blackfin/include/asm/thread_info.h @@ -77,7 +77,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index bc5b2935ca53..c3aade36c330 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -50,8 +50,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ #define INIT_THREAD_INFO(tsk) \ @@ -60,7 +58,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index e8a5ed7be021..e608e056bb53 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -56,8 +56,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ @@ -67,7 +65,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 700014d2155f..8bbc8b0ee45d 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -36,7 +36,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index ae6922626bf4..8ce2e388e37c 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,7 +48,7 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .addr_limit = KERNEL_DS, \ - .preempt_count = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 8589d462df27..07bb5bd00e2a 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -57,8 +57,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ @@ -68,7 +66,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/m68k/include/asm/thread_info_mm.h b/arch/m68k/include/asm/thread_info_mm.h index af0fda46e94b..6ea5c33b3c56 100644 --- a/arch/m68k/include/asm/thread_info_mm.h +++ b/arch/m68k/include/asm/thread_info_mm.h @@ -19,6 +19,7 @@ struct thread_info { { \ .task = &tsk, \ .exec_domain = &default_exec_domain, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/m68k/include/asm/thread_info_no.h b/arch/m68k/include/asm/thread_info_no.h index 82529f424ea3..c2bde5e24b0b 100644 --- a/arch/m68k/include/asm/thread_info_no.h +++ b/arch/m68k/include/asm/thread_info_no.h @@ -49,6 +49,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 7fac44498445..6e92885d381a 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -75,8 +75,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #define INIT_THREAD_INFO(tsk) \ { \ @@ -84,7 +82,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 143a48136a4b..f9df720d2e40 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -39,8 +39,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #define INIT_THREAD_INFO(tsk) \ { \ @@ -48,7 +46,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = _TIF_FIXADE, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 78a3881f3c12..58d64f8b2cc3 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -65,8 +65,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ @@ -76,7 +74,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 0407959da489..4ce0edfbe969 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -23,7 +23,7 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .addr_limit = KERNEL_DS, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall \ } \ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9aba5a38a7c4..c8b329255678 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -46,15 +46,13 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ .exec_domain = &default_exec_domain, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 925bcc649035..ba1cab9fc1f9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -61,7 +61,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index f09ac4806294..d570ac2e5cb9 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -51,7 +51,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 0f7b0e5fb1c7..844d73a0340c 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -54,8 +54,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #define INIT_THREAD_INFO(tsk) \ { \ @@ -64,7 +62,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 65865726b283..1b45a7bbe407 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -125,8 +125,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ @@ -135,7 +133,7 @@ struct thread_info { .task = &tsk, \ .flags = ((unsigned long)ASI_P) << TI_FLAG_CURRENT_DS_SHIFT, \ .exec_domain = &default_exec_domain, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .restart_block = { \ .fn = do_no_restart_syscall, \ }, \ diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 62274ab9471f..fd911f855367 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -32,7 +32,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b0783520988b..fad7d40b75f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -49,7 +49,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 0f4fe1faf9ba..13165641cc51 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -80,8 +80,6 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ @@ -92,7 +90,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = 1, \ + .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0085d758d645..2a99f1c15cf8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -498,6 +498,12 @@ struct task_cputime { .sum_exec_runtime = 0, \ } +/* + * Disable preemption until the scheduler is running. + * Reset by start_kernel()->sched_init()->init_idle(). + */ +#define INIT_PREEMPT_COUNT (1) + /** * struct thread_group_cputimer - thread group interval timer counts * @cputime: thread group interval timers. From d86ee4809d0329d4aa0d0f2c76c2295a16862799 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 10 Jul 2009 14:57:57 +0200 Subject: [PATCH 725/741] sched: optimize cond_resched() Optimize cond_resched() by removing one conditional. Currently cond_resched() checks system_state == SYSTEM_RUNNING in order to avoid scheduling before the scheduler is running. We can however, as per suggestion of Matt, use PREEMPT_ACTIVE to accomplish that very same. Suggested-by: Matt Mackall Signed-off-by: Peter Zijlstra Acked-by: Matt Mackall Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ++++- kernel/sched.c | 14 +++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a99f1c15cf8..16a982e389fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -501,8 +501,11 @@ struct task_cputime { /* * Disable preemption until the scheduler is running. * Reset by start_kernel()->sched_init()->init_idle(). + * + * We include PREEMPT_ACTIVE to avoid cond_resched() from working + * before the scheduler is active -- see should_resched(). */ -#define INIT_PREEMPT_COUNT (1) +#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) /** * struct thread_group_cputimer - thread group interval timer counts diff --git a/kernel/sched.c b/kernel/sched.c index 7c9098d186e6..01f55ada3598 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield) return 0; } +static inline int should_resched(void) +{ + return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); +} + static void __cond_resched(void) { #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP @@ -6560,8 +6565,7 @@ static void __cond_resched(void) int __sched _cond_resched(void) { - if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && - system_state == SYSTEM_RUNNING) { + if (should_resched()) { __cond_resched(); return 1; } @@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched); */ int cond_resched_lock(spinlock_t *lock) { - int resched = need_resched() && system_state == SYSTEM_RUNNING; + int resched = should_resched(); int ret = 0; if (spin_needbreak(lock) || resched) { spin_unlock(lock); - if (resched && need_resched()) + if (resched) __cond_resched(); else cpu_relax(); @@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (need_resched() && system_state == SYSTEM_RUNNING) { + if (should_resched()) { local_bh_enable(); __cond_resched(); local_bh_disable(); From 097041e576ee3a50d92dd643ee8ca65bf6a62e21 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 10 Jul 2009 20:06:42 -0500 Subject: [PATCH 726/741] fuse: Fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building v2.6.31-rc2-344-g69ca06c, the following build errors are found due to missing includes: CC [M] fs/fuse/dev.o fs/fuse/dev.c: In function ‘request_end’: fs/fuse/dev.c:289: error: ‘BLK_RW_SYNC’ undeclared (first use in this function) ... fs/nfs/write.c: In function ‘nfs_set_page_writeback’: fs/nfs/write.c:207: error: ‘BLK_RW_ASYNC’ undeclared (first use in this function) Signed-off-by: Larry Finger@lwfinger.net> Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 1 + fs/nfs/write.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6484eb75acd6..cbceacbc0bf9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -16,6 +16,7 @@ #include #include #include +#include MODULE_ALIAS_MISCDEV(FUSE_MINOR); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0a0a2ff767c3..35d81316163f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -19,6 +19,7 @@ #include #include #include +#include #include From 3697cd9aa80125f7717c3c7e7253cfa49a39a388 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Jul 2009 15:02:41 -0700 Subject: [PATCH 727/741] Doc: update Documentation/exception.txt Update Documentation/exception.txt. Remove trailing whitespaces in it. Signed-off-by: WANG Cong Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/exception.txt | 202 ++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/Documentation/exception.txt b/Documentation/exception.txt index 2d5aded64247..32901aa36f0a 100644 --- a/Documentation/exception.txt +++ b/Documentation/exception.txt @@ -1,123 +1,123 @@ - Kernel level exception handling in Linux 2.1.8 + Kernel level exception handling in Linux Commentary by Joerg Pommnitz -When a process runs in kernel mode, it often has to access user -mode memory whose address has been passed by an untrusted program. +When a process runs in kernel mode, it often has to access user +mode memory whose address has been passed by an untrusted program. To protect itself the kernel has to verify this address. -In older versions of Linux this was done with the -int verify_area(int type, const void * addr, unsigned long size) +In older versions of Linux this was done with the +int verify_area(int type, const void * addr, unsigned long size) function (which has since been replaced by access_ok()). -This function verified that the memory area starting at address +This function verified that the memory area starting at address 'addr' and of size 'size' was accessible for the operation specified -in type (read or write). To do this, verify_read had to look up the -virtual memory area (vma) that contained the address addr. In the -normal case (correctly working program), this test was successful. +in type (read or write). To do this, verify_read had to look up the +virtual memory area (vma) that contained the address addr. In the +normal case (correctly working program), this test was successful. It only failed for a few buggy programs. In some kernel profiling tests, this normally unneeded verification used up a considerable amount of time. -To overcome this situation, Linus decided to let the virtual memory +To overcome this situation, Linus decided to let the virtual memory hardware present in every Linux-capable CPU handle this test. How does this work? -Whenever the kernel tries to access an address that is currently not -accessible, the CPU generates a page fault exception and calls the -page fault handler +Whenever the kernel tries to access an address that is currently not +accessible, the CPU generates a page fault exception and calls the +page fault handler void do_page_fault(struct pt_regs *regs, unsigned long error_code) -in arch/i386/mm/fault.c. The parameters on the stack are set up by -the low level assembly glue in arch/i386/kernel/entry.S. The parameter -regs is a pointer to the saved registers on the stack, error_code +in arch/x86/mm/fault.c. The parameters on the stack are set up by +the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter +regs is a pointer to the saved registers on the stack, error_code contains a reason code for the exception. -do_page_fault first obtains the unaccessible address from the CPU -control register CR2. If the address is within the virtual address -space of the process, the fault probably occurred, because the page -was not swapped in, write protected or something similar. However, -we are interested in the other case: the address is not valid, there -is no vma that contains this address. In this case, the kernel jumps -to the bad_area label. +do_page_fault first obtains the unaccessible address from the CPU +control register CR2. If the address is within the virtual address +space of the process, the fault probably occurred, because the page +was not swapped in, write protected or something similar. However, +we are interested in the other case: the address is not valid, there +is no vma that contains this address. In this case, the kernel jumps +to the bad_area label. -There it uses the address of the instruction that caused the exception -(i.e. regs->eip) to find an address where the execution can continue -(fixup). If this search is successful, the fault handler modifies the -return address (again regs->eip) and returns. The execution will +There it uses the address of the instruction that caused the exception +(i.e. regs->eip) to find an address where the execution can continue +(fixup). If this search is successful, the fault handler modifies the +return address (again regs->eip) and returns. The execution will continue at the address in fixup. Where does fixup point to? -Since we jump to the contents of fixup, fixup obviously points -to executable code. This code is hidden inside the user access macros. -I have picked the get_user macro defined in include/asm/uaccess.h as an -example. The definition is somewhat hard to follow, so let's peek at +Since we jump to the contents of fixup, fixup obviously points +to executable code. This code is hidden inside the user access macros. +I have picked the get_user macro defined in arch/x86/include/asm/uaccess.h +as an example. The definition is somewhat hard to follow, so let's peek at the code generated by the preprocessor and the compiler. I selected -the get_user call in drivers/char/console.c for a detailed examination. +the get_user call in drivers/char/sysrq.c for a detailed examination. -The original code in console.c line 1405: +The original code in sysrq.c line 587: get_user(c, buf); The preprocessor output (edited to become somewhat readable): ( - { - long __gu_err = - 14 , __gu_val = 0; - const __typeof__(*( ( buf ) )) *__gu_addr = ((buf)); - if (((((0 + current_set[0])->tss.segment) == 0x18 ) || - (((sizeof(*(buf))) <= 0xC0000000UL) && - ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf))))))) + { + long __gu_err = - 14 , __gu_val = 0; + const __typeof__(*( ( buf ) )) *__gu_addr = ((buf)); + if (((((0 + current_set[0])->tss.segment) == 0x18 ) || + (((sizeof(*(buf))) <= 0xC0000000UL) && + ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf))))))) do { - __gu_err = 0; - switch ((sizeof(*(buf)))) { - case 1: - __asm__ __volatile__( - "1: mov" "b" " %2,%" "b" "1\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %3,%0\n" - " xor" "b" " %" "b" "1,%" "b" "1\n" - " jmp 2b\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,3b\n" - ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *) - ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ; - break; - case 2: + __gu_err = 0; + switch ((sizeof(*(buf)))) { + case 1: __asm__ __volatile__( - "1: mov" "w" " %2,%" "w" "1\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %3,%0\n" - " xor" "w" " %" "w" "1,%" "w" "1\n" - " jmp 2b\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,3b\n" + "1: mov" "b" " %2,%" "b" "1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl %3,%0\n" + " xor" "b" " %" "b" "1,%" "b" "1\n" + " jmp 2b\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,3b\n" + ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *) + ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ; + break; + case 2: + __asm__ __volatile__( + "1: mov" "w" " %2,%" "w" "1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl %3,%0\n" + " xor" "w" " %" "w" "1,%" "w" "1\n" + " jmp 2b\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,3b\n" ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *) - ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )); - break; - case 4: - __asm__ __volatile__( - "1: mov" "l" " %2,%" "" "1\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl %3,%0\n" - " xor" "l" " %" "" "1,%" "" "1\n" - " jmp 2b\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" " .long 1b,3b\n" + ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )); + break; + case 4: + __asm__ __volatile__( + "1: mov" "l" " %2,%" "" "1\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl %3,%0\n" + " xor" "l" " %" "" "1,%" "" "1\n" + " jmp 2b\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" " .long 1b,3b\n" ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *) - ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err)); - break; - default: - (__gu_val) = __get_user_bad(); - } - } while (0) ; - ((c)) = (__typeof__(*((buf))))__gu_val; + ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err)); + break; + default: + (__gu_val) = __get_user_bad(); + } + } while (0) ; + ((c)) = (__typeof__(*((buf))))__gu_val; __gu_err; } ); @@ -127,12 +127,12 @@ see what code gcc generates: > xorl %edx,%edx > movl current_set,%eax - > cmpl $24,788(%eax) - > je .L1424 + > cmpl $24,788(%eax) + > je .L1424 > cmpl $-1073741825,64(%esp) - > ja .L1423 + > ja .L1423 > .L1424: - > movl %edx,%eax + > movl %edx,%eax > movl 64(%esp),%ebx > #APP > 1: movb (%ebx),%dl /* this is the actual user access */ @@ -149,17 +149,17 @@ see what code gcc generates: > .L1423: > movzbl %dl,%esi -The optimizer does a good job and gives us something we can actually -understand. Can we? The actual user access is quite obvious. Thanks -to the unified address space we can just access the address in user +The optimizer does a good job and gives us something we can actually +understand. Can we? The actual user access is quite obvious. Thanks +to the unified address space we can just access the address in user memory. But what does the .section stuff do????? To understand this we have to look at the final kernel: > objdump --section-headers vmlinux - > + > > vmlinux: file format elf32-i386 - > + > > Sections: > Idx Name Size VMA LMA File off Algn > 0 .text 00098f40 c0100000 c0100000 00001000 2**4 @@ -198,18 +198,18 @@ final kernel executable: The whole user memory access is reduced to 10 x86 machine instructions. The instructions bracketed in the .section directives are no longer -in the normal execution path. They are located in a different section +in the normal execution path. They are located in a different section of the executable file: > objdump --disassemble --section=.fixup vmlinux - > + > > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax > c0199ffa <.fixup+10ba> xorb %dl,%dl > c0199ffc <.fixup+10bc> jmp c017e7a7 And finally: > objdump --full-contents --section=__ex_table vmlinux - > + > > c01aa7c4 93c017c0 e09f19c0 97c017c0 99c017c0 ................ > c01aa7d4 f6c217c0 e99f19c0 a5e717c0 f59f19c0 ................ > c01aa7e4 080a18c0 01a019c0 0a0a18c0 04a019c0 ................ @@ -235,8 +235,8 @@ sections in the ELF object file. So the instructions ended up in the .fixup section of the object file and the addresses .long 1b,3b ended up in the __ex_table section of the object file. 1b and 3b -are local labels. The local label 1b (1b stands for next label 1 -backward) is the address of the instruction that might fault, i.e. +are local labels. The local label 1b (1b stands for next label 1 +backward) is the address of the instruction that might fault, i.e. in our case the address of the label 1 is c017e7a5: the original assembly code: > 1: movb (%ebx),%dl and linked in vmlinux : > c017e7a5 movb (%ebx),%dl @@ -254,7 +254,7 @@ The assembly code becomes the value pair > c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................ ^this is ^this is - 1b 3b + 1b 3b c017e7a5,c0199ff5 in the exception table of the kernel. So, what actually happens if a fault from kernel mode with no suitable @@ -266,9 +266,9 @@ vma occurs? 3.) CPU calls do_page_fault 4.) do page fault calls search_exception_table (regs->eip == c017e7a5); 5.) search_exception_table looks up the address c017e7a5 in the - exception table (i.e. the contents of the ELF section __ex_table) + exception table (i.e. the contents of the ELF section __ex_table) and returns the address of the associated fault handle code c0199ff5. -6.) do_page_fault modifies its own return address to point to the fault +6.) do_page_fault modifies its own return address to point to the fault handle code and returns. 7.) execution continues in the fault handling code. 8.) 8a) EAX becomes -EFAULT (== -14) From c368b4921bc6e309aba2fbee0efcbbc965008d9f Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 10 Jul 2009 15:02:44 -0700 Subject: [PATCH 728/741] Doc: move Documentation/exception.txt into x86 subdir exception.txt only explains the code on x86, so it's better to move it into Documentation/x86 directory. And also rename it to exception-tables.txt which looks much more reasonable. This patch is on top of the previous one. Signed-off-by: WANG Cong Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/x86/00-INDEX | 2 ++ Documentation/{exception.txt => x86/exception-tables.txt} | 0 2 files changed, 2 insertions(+) rename Documentation/{exception.txt => x86/exception-tables.txt} (100%) diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX index dbe3377754af..f37b46d34861 100644 --- a/Documentation/x86/00-INDEX +++ b/Documentation/x86/00-INDEX @@ -2,3 +2,5 @@ - this file mtrr.txt - how to use x86 Memory Type Range Registers to increase performance +exception-tables.txt + - why and how Linux kernel uses exception tables on x86 diff --git a/Documentation/exception.txt b/Documentation/x86/exception-tables.txt similarity index 100% rename from Documentation/exception.txt rename to Documentation/x86/exception-tables.txt From 4cd1de0afaaa45309f34d7282ea4c07d9b56a3ff Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 9 Jul 2009 13:35:52 +0100 Subject: [PATCH 729/741] tty: Sort out the USB sysrq changes that wrecked performance We can't go around calling all sorts of magic per character functions at full rate 3G data speed. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/usb/serial/generic.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 932d6241b787..3d8dc5671bea 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -424,10 +424,17 @@ static void flush_and_resubmit_read_urb(struct usb_serial_port *port) if (!tty) goto done; - /* Push data to tty */ - for (i = 0; i < urb->actual_length; i++, ch++) { - if (!usb_serial_handle_sysrq_char(port, *ch)) - tty_insert_flip_char(tty, *ch, TTY_NORMAL); + /* The per character mucking around with sysrq path it too slow for + stuff like 3G modems, so shortcircuit it in the 99.9999999% of cases + where the USB serial is not a console anyway */ + if (!port->console || !port->sysrq) + tty_insert_flip_string(tty, ch, urb->actual_length); + else { + /* Push data to tty */ + for (i = 0; i < urb->actual_length; i++, ch++) { + if (!usb_serial_handle_sysrq_char(port, *ch)) + tty_insert_flip_char(tty, *ch, TTY_NORMAL); + } } tty_flip_buffer_push(tty); tty_kref_put(tty); From 24a15a62dcb1968bf4ffdae55c88fa934d971180 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 9 Jul 2009 13:36:22 +0100 Subject: [PATCH 730/741] tty: Fix USB kref leak The sysrq code acquired a kref leak. Fix it by passing the tty separately from the caller (thus effectively using the callers kref which all the callers hold anyway) Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/usb/serial/ftdi_sio.c | 2 +- drivers/usb/serial/generic.c | 7 ++++--- drivers/usb/serial/pl2303.c | 2 +- include/linux/usb/serial.h | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 3dc3768ca71c..5f08702f672f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2121,7 +2121,7 @@ static void ftdi_process_read(struct work_struct *work) /* Note that the error flag is duplicated for every character received since we don't know which character it applied to */ - if (!usb_serial_handle_sysrq_char(port, + if (!usb_serial_handle_sysrq_char(tty, port, data[packet_offset + i])) tty_insert_flip_char(tty, data[packet_offset + i], diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 3d8dc5671bea..ce57f6a32bdf 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -432,7 +432,7 @@ static void flush_and_resubmit_read_urb(struct usb_serial_port *port) else { /* Push data to tty */ for (i = 0; i < urb->actual_length; i++, ch++) { - if (!usb_serial_handle_sysrq_char(port, *ch)) + if (!usb_serial_handle_sysrq_char(tty, port, *ch)) tty_insert_flip_char(tty, *ch, TTY_NORMAL); } } @@ -534,11 +534,12 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty) } } -int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) +int usb_serial_handle_sysrq_char(struct tty_struct *tty, + struct usb_serial_port *port, unsigned int ch) { if (port->sysrq && port->console) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, tty_port_tty_get(&port->port)); + handle_sysrq(ch, tty); port->sysrq = 0; return 1; } diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ec6c132a25b5..8835802067f7 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -1038,7 +1038,7 @@ static void pl2303_read_bulk_callback(struct urb *urb) if (line_status & UART_OVERRUN_ERROR) tty_insert_flip_char(tty, 0, TTY_OVERRUN); for (i = 0; i < urb->actual_length; ++i) - if (!usb_serial_handle_sysrq_char(port, data[i])) + if (!usb_serial_handle_sysrq_char(tty, port, data[i])) tty_insert_flip_char(tty, data[i], tty_flag); tty_flip_buffer_push(tty); } diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 44801d26a37a..0ec50ba62139 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -317,7 +317,8 @@ extern int usb_serial_generic_register(int debug); extern void usb_serial_generic_deregister(void); extern void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port, gfp_t mem_flags); -extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port, +extern int usb_serial_handle_sysrq_char(struct tty_struct *tty, + struct usb_serial_port *port, unsigned int ch); extern int usb_serial_handle_break(struct usb_serial_port *port); From d4fc4a7bfc2dee626f4fec1e209e58eaa4312de6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 9 Jul 2009 13:36:58 +0100 Subject: [PATCH 731/741] tty: Fix the PL2303 private methods for sysrq PL2303 has private data shovelling methods that also have no fast path. Fix them to work the same way as the default handler. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/usb/serial/pl2303.c | 58 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 8835802067f7..efaf59c4f5d0 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -971,18 +971,46 @@ exit: __func__, retval); } +static void pl2303_push_data(struct tty_struct *tty, + struct usb_serial_port *port, struct urb *urb, + u8 line_status) +{ + unsigned char *data = urb->transfer_buffer; + /* get tty_flag from status */ + char tty_flag = TTY_NORMAL; + /* break takes precedence over parity, */ + /* which takes precedence over framing errors */ + if (line_status & UART_BREAK_ERROR) + tty_flag = TTY_BREAK; + else if (line_status & UART_PARITY_ERROR) + tty_flag = TTY_PARITY; + else if (line_status & UART_FRAME_ERROR) + tty_flag = TTY_FRAME; + dbg("%s - tty_flag = %d", __func__, tty_flag); + + tty_buffer_request_room(tty, urb->actual_length + 1); + /* overrun is special, not associated with a char */ + if (line_status & UART_OVERRUN_ERROR) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + if (port->console && port->sysrq) { + int i; + for (i = 0; i < urb->actual_length; ++i) + if (!usb_serial_handle_sysrq_char(tty, port, data[i])) + tty_insert_flip_char(tty, data[i], tty_flag); + } else + tty_insert_flip_string(tty, data, urb->actual_length); + tty_flip_buffer_push(tty); +} + static void pl2303_read_bulk_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct pl2303_private *priv = usb_get_serial_port_data(port); struct tty_struct *tty; - unsigned char *data = urb->transfer_buffer; unsigned long flags; - int i; int result; int status = urb->status; u8 line_status; - char tty_flag; dbg("%s - port %d", __func__, port->number); @@ -1010,10 +1038,7 @@ static void pl2303_read_bulk_callback(struct urb *urb) } usb_serial_debug_data(debug, &port->dev, __func__, - urb->actual_length, data); - - /* get tty_flag from status */ - tty_flag = TTY_NORMAL; + urb->actual_length, urb->transfer_buffer); spin_lock_irqsave(&priv->lock, flags); line_status = priv->line_status; @@ -1021,26 +1046,9 @@ static void pl2303_read_bulk_callback(struct urb *urb) spin_unlock_irqrestore(&priv->lock, flags); wake_up_interruptible(&priv->delta_msr_wait); - /* break takes precedence over parity, */ - /* which takes precedence over framing errors */ - if (line_status & UART_BREAK_ERROR) - tty_flag = TTY_BREAK; - else if (line_status & UART_PARITY_ERROR) - tty_flag = TTY_PARITY; - else if (line_status & UART_FRAME_ERROR) - tty_flag = TTY_FRAME; - dbg("%s - tty_flag = %d", __func__, tty_flag); - tty = tty_port_tty_get(&port->port); if (tty && urb->actual_length) { - tty_buffer_request_room(tty, urb->actual_length + 1); - /* overrun is special, not associated with a char */ - if (line_status & UART_OVERRUN_ERROR) - tty_insert_flip_char(tty, 0, TTY_OVERRUN); - for (i = 0; i < urb->actual_length; ++i) - if (!usb_serial_handle_sysrq_char(tty, port, data[i])) - tty_insert_flip_char(tty, data[i], tty_flag); - tty_flip_buffer_push(tty); + pl2303_push_data(tty, port, urb, line_status); } tty_kref_put(tty); /* Schedule the next read _if_ we are still open */ From 8711c67bee675b4f7a378c71ad5a59c981ec3df0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 10 Jul 2009 12:34:27 +0200 Subject: [PATCH 732/741] isofs: fix Joliet regression commit 5404ac8e4418ab3d254950ee4f9bcafc1da20b4a ("isofs: cleanup mount option processing") missed conversion of joliet option flag resulting in non-working Joliet support. CC: walt Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/isofs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 58a7963e168a..85f96bc651c7 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = { struct iso9660_options{ unsigned int rock:1; + unsigned int joliet:1; unsigned int cruft:1; unsigned int hide:1; unsigned int showassoc:1; @@ -151,7 +152,6 @@ struct iso9660_options{ unsigned int gid_set:1; unsigned int utf8:1; unsigned char map; - char joliet; unsigned char check; unsigned int blocksize; mode_t fmode; @@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { sec = (struct iso_supplementary_descriptor *)vdp; if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { - if (opt.joliet == 'y') { + if (opt.joliet) { if (sec->escape[2] == 0x40) joliet_level = 1; else if (sec->escape[2] == 0x43) From 025dc740d01f99ccba945df1f9ef9e06b1c15d96 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 11 Jul 2009 13:42:37 +0200 Subject: [PATCH 733/741] hwmon: (max6650) Fix lock imbalance Add omitted update_lock to one switch/case in set_div. Signed-off-by: Jiri Slaby Acked-by: Hans J. Koch Signed-off-by: Jean Delvare --- drivers/hwmon/max6650.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c index 86142a858238..58f66be61b1f 100644 --- a/drivers/hwmon/max6650.c +++ b/drivers/hwmon/max6650.c @@ -418,6 +418,7 @@ static ssize_t set_div(struct device *dev, struct device_attribute *devattr, data->count = 3; break; default: + mutex_unlock(&data->update_lock); dev_err(&client->dev, "illegal value for fan divider (%d)\n", div); return -EINVAL; From 24205e0850dd8a79c597e366daafdd5f31a24656 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 11 Jul 2009 13:42:37 +0200 Subject: [PATCH 734/741] hwmon: (sht15) Remove unnecessary semicolon Signed-off-by: Joe Perches Signed-off-by: Jean Delvare --- drivers/hwmon/sht15.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 56cd6004da36..6290a259456e 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -257,7 +257,7 @@ static inline int sht15_update_single_val(struct sht15_data *data, (data->flag == SHT15_READING_NOTHING), msecs_to_jiffies(timeout_msecs)); if (ret == 0) {/* timeout occurred */ - disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));; + disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); sht15_connection_reset(data); return -ETIME; } From ec05a8d75d0777cd221f61a88437a31e4cfb83d8 Mon Sep 17 00:00:00 2001 From: Alistair John Strachan Date: Sat, 11 Jul 2009 13:42:38 +0200 Subject: [PATCH 735/741] hwmon: (abituguru3) DMI probing for AB9, AB9 QuadQT and IX38 QuadGT Switch the AB9, AB9 QuadQT and IX38 QuadGT over from port probing to the preferred DMI probe method. Signed-off-by: Alistair John Strachan Tested-by: dan Tested-by: Nygel Lyndley Tested-by: Dmitriy Fedchenko Acked-by: Hans de Goede Signed-off-by: Jean Delvare --- drivers/hwmon/abituguru3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index ad2b3431b725..7d3f15d32fdf 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -357,7 +357,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX5 Fan", 39, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0014, NULL /* Abit AB9 Pro, need DMI string */, { + { 0x0014, "AB9", /* + AB9 Pro */ { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR", 1, 0, 10, 1, 0 }, { "DDR VTT", 2, 0, 10, 1, 0 }, @@ -455,7 +455,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX3 FAN", 37, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0018, NULL /* Unknown, need DMI string */, { + { 0x0018, "AB9 QuadGT", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR2", 1, 0, 20, 1, 0 }, { "DDR2 VTT", 2, 0, 10, 1, 0 }, @@ -564,7 +564,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX3 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x001C, NULL /* Unknown, need DMI string */, { + { 0x001C, "IX38 QuadGT", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR2", 1, 0, 20, 1, 0 }, { "DDR2 VTT", 2, 0, 10, 1, 0 }, From 373c0a7ed3ea3b34efedb7c83ffb521adff7c894 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 11 Jul 2009 10:06:54 -0400 Subject: [PATCH 736/741] Fix compile error due to congestion_wait() changes Move the definition of BLK_RW_ASYNC/BLK_RW_SYNC into linux/backing-dev.h so that it is available to all callers of set/clear_bdi_congested(). This replaces commit 097041e576ee3a50d92dd643ee8ca65bf6a62e21 ("fuse: Fix build error"), which will be reverted. Signed-off-by: Trond Myklebust Acked-by: Larry Finger Cc: Jens Axboe Cc: Miklos Szeredi Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 5 +++++ include/linux/blkdev.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3a52a63c1351..1d52425a6118 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -229,6 +229,11 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << BDI_async_congested)); } +enum { + BLK_RW_ASYNC = 0, + BLK_RW_SYNC = 1, +}; + void clear_bdi_congested(struct backing_dev_info *bdi, int sync); void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0146e0fecf1a..e7cb5dbf6c26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -70,11 +70,6 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_PC, }; -enum { - BLK_RW_ASYNC = 0, - BLK_RW_SYNC = 1, -}; - /* * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a From 81e4e1ba7ed4a1fdcf0e2ee944f1575010471464 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 11 Jul 2009 11:22:34 -0700 Subject: [PATCH 737/741] Revert "fuse: Fix build error" as unnecessary This reverts commit 097041e576ee3a50d92dd643ee8ca65bf6a62e21. Trond had a better fix, which is the parent of this one ("Fix compile error due to congestion_wait() changes") Requested-by: Trond Myklebust Acked-by: Larry Finger Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 1 - fs/nfs/write.c | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cbceacbc0bf9..6484eb75acd6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -16,7 +16,6 @@ #include #include #include -#include MODULE_ALIAS_MISCDEV(FUSE_MINOR); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 35d81316163f..0a0a2ff767c3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -19,7 +19,6 @@ #include #include #include -#include #include From a137802ee839ace40079bebde24cfb416f73208a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2009 11:25:04 -0700 Subject: [PATCH 738/741] Don't use '-fwrapv' compiler option: it's buggy in gcc-4.1.x This causes kernel images that don't run init to completion with certain broken gcc versions. This fixes kernel bugzilla entry: http://bugzilla.kernel.org/show_bug.cgi?id=13012 I suspect the gcc problem is this: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28230 Fix the problem by using the -fno-strict-overflow flag instead, which not only does not exist in the known-to-be-broken versions of gcc (it was introduced later than fwrapv), but seems to be much less disturbing to gcc too: the difference in the generated code by -fno-strict-overflow are smaller (compared to using neither flag) than when using -fwrapv. Reported-by: Barry K. Nathan Pushed-by: Frans Pop Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0aeec59c1f0a..bbe8453baa74 100644 --- a/Makefile +++ b/Makefile @@ -565,7 +565,7 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) KBUILD_CFLAGS += $(call cc-option,-Wno-pointer-sign,) # disable invalid "can't wrap" optimizations for signed / pointers -KBUILD_CFLAGS += $(call cc-option,-fwrapv) +KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # revert to pre-gcc-4.4 behaviour of .eh_frame KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) From f9fabcb58a6d26d6efde842d1703ac7cfa9427b6 Mon Sep 17 00:00:00 2001 From: Julien Tinnes Date: Fri, 26 Jun 2009 20:27:40 +0200 Subject: [PATCH 739/741] personality: fix PER_CLEAR_ON_SETID We have found that the current PER_CLEAR_ON_SETID mask on Linux doesn't include neither ADDR_COMPAT_LAYOUT, nor MMAP_PAGE_ZERO. The current mask is READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE. We believe it is important to add MMAP_PAGE_ZERO, because by using this personality it is possible to have the first page mapped inside a process running as setuid root. This could be used in those scenarios: - Exploiting a NULL pointer dereference issue in a setuid root binary - Bypassing the mmap_min_addr restrictions of the Linux kernel: by running a setuid binary that would drop privileges before giving us control back (for instance by loading a user-supplied library), we could get the first page mapped in a process we control. By further using mremap and mprotect on this mapping, we can then completely bypass the mmap_min_addr restrictions. Less importantly, we believe ADDR_COMPAT_LAYOUT should also be added since on x86 32bits it will in practice disable most of the address space layout randomization (only the stack will remain randomized). Signed-off-by: Julien Tinnes Signed-off-by: Tavis Ormandy Cc: stable@kernel.org Acked-by: Christoph Hellwig Acked-by: Kees Cook Acked-by: Eugene Teo [ Shortened lines and fixed whitespace as per Christophs' suggestion ] Signed-off-by: Linus Torvalds --- include/linux/personality.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/personality.h b/include/linux/personality.h index a84e9ff9b27e..126120819a0d 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -40,7 +40,10 @@ enum { * Security-relevant compatibility flags that must be * cleared upon setuid or setgid exec: */ -#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE) +#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC | \ + ADDR_NO_RANDOMIZE | \ + ADDR_COMPAT_LAYOUT | \ + MMAP_PAGE_ZERO) /* * Personality types. From 405f55712dfe464b3240d7816cc4fe4174831be2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 11 Jul 2009 22:08:37 +0400 Subject: [PATCH 740/741] headers: smp_lock.h redux * Remove smp_lock.h from files which don't need it (including some headers!) * Add smp_lock.h to files which do need it * Make smp_lock.h include conditional in hardirq.h It's needed only for one kernel_locked() usage which is under CONFIG_PREEMPT This will make hardirq.h inclusion cheaper for every PREEMPT=n config (which includes allmodconfig/allyesconfig, BTW) Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 1 - arch/blackfin/kernel/ptrace.c | 1 - arch/blackfin/kernel/sys_bfin.c | 1 - arch/cris/kernel/sys_cris.c | 1 - arch/ia64/kernel/ptrace.c | 1 - arch/m32r/kernel/ptrace.c | 1 - arch/microblaze/kernel/ptrace.c | 1 - arch/microblaze/kernel/signal.c | 1 - arch/microblaze/kernel/sys_microblaze.c | 1 - arch/mips/kernel/ptrace32.c | 1 - arch/mips/mm/hugetlbpage.c | 1 - arch/mn10300/kernel/ptrace.c | 1 - arch/mn10300/kernel/signal.c | 1 - arch/mn10300/kernel/sys_mn10300.c | 1 - arch/mn10300/kernel/traps.c | 1 - arch/mn10300/mm/fault.c | 1 - arch/mn10300/mm/misalignment.c | 1 - arch/powerpc/kernel/ptrace32.c | 1 - arch/s390/kernel/dis.c | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/mm/fault.c | 1 - arch/sh/mm/tlb-sh3.c | 1 - arch/sparc/kernel/ptrace_32.c | 1 - arch/sparc/kernel/ptrace_64.c | 1 - arch/sparc/kernel/time_64.c | 1 - arch/sparc/kernel/traps_32.c | 1 - drivers/block/DAC960.c | 1 + drivers/block/cciss.c | 1 + drivers/block/loop.c | 1 - drivers/bluetooth/hci_vhci.c | 1 - drivers/char/amiserial.c | 1 + drivers/char/cyclades.c | 1 + drivers/char/epca.c | 1 + drivers/char/isicom.c | 1 + drivers/char/istallion.c | 1 + drivers/char/moxa.c | 1 + drivers/char/mxser.c | 1 + drivers/char/n_hdlc.c | 1 + drivers/char/n_r3964.c | 1 + drivers/char/pty.c | 1 + drivers/char/rio/rio_linux.c | 1 + drivers/char/riscom8.c | 1 + drivers/char/rocket.c | 1 + drivers/char/serial167.c | 1 + drivers/char/specialix.c | 1 + drivers/char/sx.c | 1 + drivers/char/synclink.c | 1 + drivers/char/synclink_gt.c | 1 + drivers/char/synclinkmp.c | 1 + drivers/char/tpm/tpm.c | 1 - drivers/char/tty_ioctl.c | 1 - drivers/char/tty_ldisc.c | 1 - drivers/char/vt.c | 1 + drivers/char/vt_ioctl.c | 1 + drivers/gpio/vr41xx_giu.c | 1 - drivers/hid/usbhid/hid-core.c | 1 - drivers/isdn/hisax/hfc_usb.c | 1 - drivers/isdn/i4l/isdn_tty.c | 1 + drivers/isdn/mISDN/stack.c | 1 + drivers/media/dvb/bt8xx/dst_ca.c | 1 + drivers/media/dvb/dvb-core/dvbdev.h | 1 - drivers/media/dvb/ttpci/av7110.c | 1 - drivers/media/radio/radio-mr800.c | 1 + drivers/media/radio/radio-si470x.c | 1 + drivers/media/video/bt8xx/bttv-driver.c | 1 + drivers/media/video/cx23885/cx23885-417.c | 1 + drivers/media/video/cx23885/cx23885-video.c | 1 + drivers/media/video/cx88/cx88-blackbird.c | 1 + drivers/media/video/cx88/cx88-video.c | 1 + drivers/media/video/dabusb.c | 1 + drivers/media/video/pwc/pwc-if.c | 1 + drivers/media/video/pwc/pwc.h | 1 - drivers/media/video/s2255drv.c | 1 + drivers/media/video/saa5246a.c | 1 - drivers/media/video/saa5249.c | 1 - drivers/media/video/saa7134/saa7134-empress.c | 1 + drivers/media/video/se401.c | 1 + drivers/media/video/stk-webcam.c | 1 + drivers/media/video/stradis.c | 1 + drivers/media/video/stv680.c | 1 + drivers/media/video/usbvideo/vicam.c | 1 + drivers/media/video/usbvision/usbvision-video.c | 1 + drivers/media/video/v4l2-dev.c | 1 - drivers/media/video/zoran/zoran_driver.c | 1 + drivers/misc/sgi-gru/grufile.c | 1 - drivers/misc/sgi-gru/grukservices.c | 1 - drivers/net/irda/irtty-sir.c | 1 - drivers/pci/hotplug/cpci_hotplug_core.c | 1 - drivers/pci/hotplug/cpqphp_ctrl.c | 1 - drivers/pci/hotplug/cpqphp_sysfs.c | 1 + drivers/pci/hotplug/pciehp_ctrl.c | 1 - drivers/pci/syscall.c | 1 - drivers/s390/block/dasd_ioctl.c | 1 + drivers/scsi/qla2xxx/qla_mid.c | 1 - drivers/telephony/ixj.c | 1 + drivers/telephony/phonedev.c | 1 - drivers/usb/class/cdc-wdm.c | 1 - drivers/usb/gadget/amd5536udc.c | 1 - drivers/usb/gadget/langwell_udc.c | 1 - drivers/usb/gadget/s3c2410_udc.c | 1 - drivers/usb/host/r8a66597-hcd.c | 1 - drivers/usb/misc/iowarrior.c | 1 + drivers/usb/misc/rio500.c | 1 + drivers/usb/misc/usblcd.c | 1 + drivers/usb/musb/cppi_dma.h | 1 - drivers/usb/musb/musb_core.h | 1 - drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/mos7840.c | 1 + drivers/usb/serial/usb-serial.c | 1 + drivers/video/fbmem.c | 1 - fs/adfs/super.c | 1 + fs/afs/super.c | 1 + fs/autofs4/dev-ioctl.c | 1 - fs/bfs/dir.c | 1 - fs/bfs/file.c | 1 - fs/btrfs/compression.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/ioctl.c | 1 - fs/btrfs/super.c | 1 - fs/char_dev.c | 1 - fs/compat.c | 1 - fs/compat_ioctl.c | 1 + fs/exofs/super.c | 1 + fs/ext2/ioctl.c | 1 - fs/ext4/ioctl.c | 1 - fs/fat/dir.c | 1 - fs/fat/namei_msdos.c | 1 - fs/fat/namei_vfat.c | 1 - fs/fcntl.c | 1 - fs/freevxfs/vxfs_super.c | 1 + fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hpfs/dir.c | 1 + fs/hpfs/file.c | 1 + fs/hpfs/hpfs_fn.h | 1 - fs/hpfs/inode.c | 1 + fs/hpfs/namei.c | 1 + fs/jffs2/super.c | 1 + fs/lockd/clntproc.c | 1 + fs/lockd/svc4proc.c | 1 + fs/lockd/svcproc.c | 1 + fs/nfs/delegation.c | 1 + fs/nfs/dir.c | 1 - fs/nfs/file.c | 1 - fs/nfs/inode.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/read.c | 1 - fs/nfsd/nfsctl.c | 1 - fs/nfsd/nfssvc.c | 1 - fs/nilfs2/dir.c | 1 - fs/ocfs2/ioctl.c | 1 - fs/reiserfs/xattr.c | 1 - fs/squashfs/super.c | 1 + fs/ubifs/ioctl.c | 1 - fs/xfs/linux-2.6/xfs_file.c | 1 - include/linux/crash_dump.h | 1 - include/linux/hardirq.h | 2 ++ include/linux/quotaops.h | 1 - include/linux/sunrpc/xdr.h | 1 - kernel/power/user.c | 1 - kernel/trace/blktrace.c | 1 + kernel/trace/trace.c | 1 + net/appletalk/ddp.c | 1 + net/ipx/af_ipx.c | 1 + net/irda/af_irda.c | 1 + net/irda/irnet/irnet.h | 1 - net/irda/irnet/irnet_ppp.c | 1 + net/sunrpc/clnt.c | 1 - net/sunrpc/sched.c | 1 - net/sunrpc/svc_xprt.c | 1 + net/wanrouter/wanmain.c | 1 + net/x25/af_x25.c | 1 + 173 files changed, 81 insertions(+), 93 deletions(-) diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 1e9ad52c460e..e072041d19f8 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index d76618db50df..6a387eec6b65 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c index a8f1329c15a4..3da60fb13ce4 100644 --- a/arch/blackfin/kernel/sys_bfin.c +++ b/arch/blackfin/kernel/sys_bfin.c @@ -29,7 +29,6 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include #include #include diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index a79fbd87021b..2ad962c7e88e 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 92c9689b7d97..9daa87fdb018 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index bf0abe9e1f73..98b8feb12ed8 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index b86aa623e36d..53ff39af6a5c 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 493819c25fba..1c80e4fc40ce 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 8c9ebac5da10..e000bce09b2b 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index c4f9ac17474a..32644b4a0714 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 471c09aa1614..8c2834f5919d 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index e143339ad28e..cf847dabc1bd 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 9f7572a0f578..feb2f2e810db 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index bca5a84dc72c..29d196b83d25 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index 0dfdc5001124..91365adba4f5 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index a62e1e138bc1..53bb17d0f068 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include /* For unblank_screen() */ diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index 94c4a4358065..30016251f658 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 297632cba047..8a6daf4129f6 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index d2f270c995d9..db943a7ec513 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 490b39934d65..43acd73105b7 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 74eb26bf1970..e5e119fe03b2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c index 7fbfd5a11ffa..17cb7c3adf22 100644 --- a/arch/sh/mm/tlb-sh3.c +++ b/arch/sh/mm/tlb-sh3.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 8ce6285a06d5..7e3dfd9bb97e 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index a941c610e7ce..4ae91dc2feb9 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 5c12e79b4bdf..da1218e8ee87 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 358283341b47..c0490c7bbde0 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 668dc234b8e2..1e6b7c14f697 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 65a0655e7fc8..a52cc7fe45ea 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 801f4ab83302..5757188cd1fb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 1df9dda2e377..d5cde6d86f89 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 72429b6b2fa8..6c32fbf07164 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -81,6 +81,7 @@ static char *serial_version = "4.30"; #include #include #include +#include #include #include diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index f3366d3f06cf..2dafc2da0648 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -633,6 +633,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/epca.c b/drivers/char/epca.c index abef1f7d84fe..ff647ca1c489 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 621d1184673c..4f1f4cd670da 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 0c999f5bb3db..ab2f3349c5c4 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 65b6ff2442c6..dd0083bbb64a 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 52d953eb30c3..dbf8d52f31d0 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index 1c43c8cdee25..c68118efad84 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -97,6 +97,7 @@ #include #include #include +#include #include /* used in new tty drivers */ #include /* used in new tty drivers */ #include diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 2e99158ebb8a..6934025a1ac1 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include /* used in new tty drivers */ diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 9d1b4f548f67..6e6942c45f5b 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index ce81da5b2da9..d58c2eb07f07 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index 217660451237..171711acf5cd 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 63d5b628477a..0e29a23ec4c5 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c index f1f24f0ee26f..51e7a46787be 100644 --- a/drivers/char/serial167.c +++ b/drivers/char/serial167.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index e72be4190a44..bfe4cdb2febb 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 518f2a25d91e..a81ec4fcf6ff 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -216,6 +216,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index afded3a2379c..813552f14884 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index a2e67e6df3a1..91f20a92fddf 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 6f727e3c53ad..8d4a2a8a0a70 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index ccdd828adcef..b0603b2e5684 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "tpm.h" diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index b24f6c6a1ea3..ad6ba4ed2808 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index 913aa8d3f1c5..0ef0dc97ba20 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d9113b4c76e3..7947bd1b4cf7 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 7539bed0f7e0..95189f288f8c 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpio/vr41xx_giu.c b/drivers/gpio/vr41xx_giu.c index b70e06133e78..b16c9a8c03f5 100644 --- a/drivers/gpio/vr41xx_giu.c +++ b/drivers/gpio/vr41xx_giu.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 76c4bbe9dccb..3c1fcb7640ab 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 8df889b0c1a9..9de54202c90c 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include "hisax.h" diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index b4d4522e5071..2881a66c1aa9 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -13,6 +13,7 @@ #include #include +#include #include "isdn_common.h" #include "isdn_tty.h" #ifdef CONFIG_ISDN_AUDIO diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index e2f45019ebf0..3e1532a180ff 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -17,6 +17,7 @@ #include #include +#include #include "core.h" static u_int *debug; diff --git a/drivers/media/dvb/bt8xx/dst_ca.c b/drivers/media/dvb/bt8xx/dst_ca.c index 4601b059b2b2..0e246eaad05a 100644 --- a/drivers/media/dvb/bt8xx/dst_ca.c +++ b/drivers/media/dvb/bt8xx/dst_ca.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "dvbdev.h" diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 79927305e84d..487919bea7ae 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h @@ -27,7 +27,6 @@ #include #include #include -#include #define DVB_MAJOR 212 diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c index d1d959ed37b7..8d65c652ba50 100644 --- a/drivers/media/dvb/ttpci/av7110.c +++ b/drivers/media/dvb/ttpci/av7110.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c index 837467f93805..575bf9d89419 100644 --- a/drivers/media/radio/radio-mr800.c +++ b/drivers/media/radio/radio-mr800.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c index 46d216329611..e85f318b4d2b 100644 --- a/drivers/media/radio/radio-si470x.c +++ b/drivers/media/radio/radio-si470x.c @@ -127,6 +127,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index 5eb1464af670..d147d29bb0d3 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include "bttvp.h" diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c index 2943bfd32a94..428f0c45e6b7 100644 --- a/drivers/media/video/cx23885/cx23885-417.c +++ b/drivers/media/video/cx23885/cx23885-417.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c index 70836af3ab48..5d6093336300 100644 --- a/drivers/media/video/cx23885/cx23885-video.c +++ b/drivers/media/video/cx23885/cx23885-video.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c index 44eacfb0d0d6..356d6896da3f 100644 --- a/drivers/media/video/cx88/cx88-blackbird.c +++ b/drivers/media/video/cx88/cx88-blackbird.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index b12770848c00..2bb54c3ef5cd 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/dabusb.c b/drivers/media/video/dabusb.c index ec2f45dde164..0664d111085f 100644 --- a/drivers/media/video/dabusb.c +++ b/drivers/media/video/dabusb.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c index db25c3034c11..8d17cf613306 100644 --- a/drivers/media/video/pwc/pwc-if.c +++ b/drivers/media/video/pwc/pwc-if.c @@ -62,6 +62,7 @@ #include #include #include +#include #ifdef CONFIG_USB_PWC_INPUT_EVDEV #include #endif diff --git a/drivers/media/video/pwc/pwc.h b/drivers/media/video/pwc/pwc.h index 0be6f814f539..0b658dee05a4 100644 --- a/drivers/media/video/pwc/pwc.h +++ b/drivers/media/video/pwc/pwc.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index 6be845ccc7d7..9e3262c0ba37 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c index 155804b061e9..b624a4c01fdc 100644 --- a/drivers/media/video/saa5246a.c +++ b/drivers/media/video/saa5246a.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c index 271d6e931b75..12835fb82c95 100644 --- a/drivers/media/video/saa5249.c +++ b/drivers/media/video/saa5249.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index add1757f8930..296788c3bf0e 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "saa7134-reg.h" diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c index c8f05297d0f0..85ffc2cba039 100644 --- a/drivers/media/video/se401.c +++ b/drivers/media/video/se401.c @@ -31,6 +31,7 @@ static const char version[] = "0.24"; #include #include #include +#include #include #include #include "se401.h" diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c index 2e5937047278..4d6785e63455 100644 --- a/drivers/media/video/stk-webcam.c +++ b/drivers/media/video/stk-webcam.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c index 0eb313082c97..eaada39c76fd 100644 --- a/drivers/media/video/stradis.c +++ b/drivers/media/video/stradis.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c index 75f286f7a2e9..8b4e7dafce7b 100644 --- a/drivers/media/video/stv680.c +++ b/drivers/media/video/stv680.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 8d73979596f9..45fce39ec9ad 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c index 90b58914f984..90d9b5c0e9a7 100644 --- a/drivers/media/video/usbvision/usbvision-video.c +++ b/drivers/media/video/usbvision/usbvision-video.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 31eac66411d7..a7f1b69a7dab 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c index 3d7df32a3d87..bcdefb1bcb3d 100644 --- a/drivers/media/video/zoran/zoran_driver.c +++ b/drivers/media/video/zoran/zoran_driver.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index fa2d93a9fb8d..aed609832bc2 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index eedbf9c32760..79689b10f937 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index d53aa9582137..20f9bc626688 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index a5b9f6ae507b..d703e73fffa7 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index 2fa47af992a8..0ff689afa757 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index 8450f4a6568a..e6089bdb6e5b 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "cpqphp.h" diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index ff4034502d24..8aab8edf123e 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include "../pci.h" diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index ec22284eed30..e1c1ec540893 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include "pci.h" diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 4ce3f72ee1c1..df918ef27965 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 650bcef08f2a..cd78c501803a 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index a913efc69669..40de151f2789 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -257,6 +257,7 @@ #include /* everything... */ #include /* error codes */ #include +#include #include #include #include diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c index b52cc830c0b4..f3873f650bb4 100644 --- a/drivers/telephony/phonedev.c +++ b/drivers/telephony/phonedev.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 0fe434505ac4..ba589d4ca8bc 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index 826f3adde5d8..77352ccc245e 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c index 6829d5961359..a3913519fd58 100644 --- a/drivers/usb/gadget/langwell_udc.c +++ b/drivers/usb/gadget/langwell_udc.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 9a2b8920532d..a9b452fe6221 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 56976cc0352a..e18f74946e68 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 3c5fe5cee05a..90e1a8dedfa9 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c index deb95bb49fd1..d645f3899fe1 100644 --- a/drivers/usb/misc/rio500.c +++ b/drivers/usb/misc/rio500.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c index e0ff9ccd866b..29092b8e59ce 100644 --- a/drivers/usb/misc/usblcd.c +++ b/drivers/usb/misc/usblcd.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/musb/cppi_dma.h b/drivers/usb/musb/cppi_dma.h index 8a39de3e6e47..59bf949e589b 100644 --- a/drivers/usb/musb/cppi_dma.h +++ b/drivers/usb/musb/cppi_dma.h @@ -5,7 +5,6 @@ #include #include -#include #include #include diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index f3772ca3b2cf..381d648a36b8 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 5f08702f672f..5a8ae274d258 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index c40f95c1951c..c31940a307f8 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a84216464ca0..0c39b55aeef4 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 53ea05645ff8..a85c818be945 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/adfs/super.c b/fs/adfs/super.c index aad92f0a1048..6910a98bd73c 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "adfs.h" #include "dir_f.h" diff --git a/fs/afs/super.c b/fs/afs/super.c index ad0514d0115f..e1ea1c240b6a 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index f3da2eb51f56..00bf8fcb245f 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 54bd07d44e68..1e41aadb1068 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include "bfs.h" diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 6a021265f018..88b9a3ff44e4 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -11,7 +11,6 @@ #include #include -#include #include "bfs.h" #undef DEBUG diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index de1e2fd32080..9d8ba4d54a37 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7c3cd248d8d6..4b833972273a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ffa3d34ea19..791eab19e330 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9f4db848db10..bd88f25889f7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9f179d4832d5..6d6d06cb6dfc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/char_dev.c b/fs/char_dev.c index b7c9d5187a75..a173551e19d7 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/fs/compat.c b/fs/compat.c index fbadb947727b..94502dab972a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 626c7483b4de..f28f070a60fc 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/exofs/super.c b/fs/exofs/super.c index a343b4ea62f6..5ab10c3bbebe 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -31,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include #include #include diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 7cb4badef927..e7431309bdca 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bb415408fdb6..24a6abb2aef5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 38ff75a0fe22..530b4ca01510 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 82f88733b681..bbc94ae4fd77 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 73471b7ecc8c..cb6e83557112 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include "fat.h" diff --git a/fs/fcntl.c b/fs/fcntl.c index a040b764f8e3..ae413086db97 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index cdbd1654e4cd..1e8af939b3e4 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6f833dc8e910..f7fcbe49da72 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "hfs_fs.h" diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9fc3af0c0dab..c0759fe0855b 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 6916c41d7017..8865c94f55f6 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -6,6 +6,7 @@ * directory VFS functions */ +#include #include "hpfs_fn.h" static int hpfs_dir_release(struct inode *inode, struct file *filp) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 64ab52259204..3efabff00367 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -6,6 +6,7 @@ * file VFS functions */ +#include #include "hpfs_fn.h" #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c2ea31bae313..701ca54c0867 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "hpfs.h" diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 39a1bfbea312..fe703ae46bc7 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -6,6 +6,7 @@ * inode VFS functions */ +#include #include "hpfs_fn.h" void hpfs_init_inode(struct inode *i) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b649232dde97..82b9c4ba9ed0 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -6,6 +6,7 @@ * adding & removing files & directories */ #include +#include #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 07a22caf2687..0035c021395a 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index f2fdcbce143e..4336adba952a 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1725037374c5..bd173a6ca3b1 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3688e55901fc..e1d28ddd2169 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index af05b918cb5b..6dd48a4405b4 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 89f98e9a024b..38d42c29fb92 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0055b813ec2c..05062329b678 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 64f87194d390..bd7938eda6a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92ce43517814..ff0c080db59b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 96c4ebfa46f4..73ea5e8d66ce 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1250fb978ac1..6d0847562d87 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54b..492c79b7800b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 54100acc1102..1a4fa04cf071 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -43,7 +43,6 @@ */ #include -#include #include "nilfs.h" #include "page.h" diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 9fcd36dcc9a0..467b413bec21 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -7,7 +7,6 @@ #include #include -#include #define MLOG_MASK_PREFIX ML_INODE #include diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f3d47d856848..6925b835a43b 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3b52770f46ff..cb5fc57e370b 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 6db7a6be6c97..8aacd64957a2 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -25,7 +25,6 @@ /* This file implements EXT2-compatible extended attribute ioctl() calls */ #include -#include #include #include "ubifs.h" diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f4e255441574..0542fd507649 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -41,7 +41,6 @@ #include "xfs_ioctl.h" #include -#include static struct vm_operations_struct xfs_file_vm_ops; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2dac064d8359..0026f267da20 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -3,7 +3,6 @@ #ifdef CONFIG_CRASH_DUMP #include -#include #include #include diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 45257475623c..8246c697863d 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,7 +2,9 @@ #define LINUX_HARDIRQ_H #include +#ifdef CONFIG_PREEMPT #include +#endif #include #include #include diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 7bc457593684..26361c4c037a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -7,7 +7,6 @@ #ifndef _LINUX_QUOTAOPS_ #define _LINUX_QUOTAOPS_ -#include #include static inline struct quota_info *sb_dqopt(struct super_block *sb) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d8910b68e1bd..b99c625fddfe 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -12,7 +12,6 @@ #include #include #include -#include /* * Buffer adjustment diff --git a/kernel/power/user.c b/kernel/power/user.c index ed97375daae9..bf0014d6a5f0 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 39af8af6fc30..1090b0aed9ba 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3aa0a0dfdfa8..8bc8d8afea6a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 590b83963622..bfbe13786bb4 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -54,6 +54,7 @@ #include #include #include +#include #include /* For TIOCOUTQ/INQ */ #include #include diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 417b0e309495..f1118d92a191 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index cb762c8723ea..80cf29aae096 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index bccf4d0059f0..b001c361ad30 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,7 +241,6 @@ #include #include -#include #include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 6d8ae03c14f5..68cbcb19cbd8 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,6 +13,7 @@ * 2) as a control channel (write commands, read events) */ +#include #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5bc2f45bddf0..ebfcf9b89909 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1102ce1251f7..8f459abe97cf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6f33d33cc064..27d44332f017 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 466e2d22d256..258daa80ad92 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -48,6 +48,7 @@ #include #include /* support for loadable modules */ #include /* kmalloc(), kfree() */ +#include #include #include /* inline mem*, str* functions */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 21cdc872004e..5e6c072c64d3 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include From dd0d9a46f573b086a67522f819566427dba9c4c7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 9 Jul 2009 10:44:30 +0100 Subject: [PATCH 741/741] AFS: Fix compilation warning Fix the following warning: fs/afs/dir.c: In function 'afs_d_revalidate': fs/afs/dir.c:567: warning: 'fid.vnode' may be used uninitialized in this function fs/afs/dir.c:567: warning: 'fid.unique' may be used uninitialized in this function by marking the 'fid' variable as an uninitialized_var. The problem is that gcc doesn't always manage to work out that fid is always set on the path through the function that uses it. Cc: linux-afs@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Artem Bityutskiy Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9bd757774c9e..88067f36e5e7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct afs_vnode *vnode, *dir; - struct afs_fid fid; + struct afs_fid uninitialized_var(fid); struct dentry *parent; struct key *key; void *dir_version;