From fe8d52614bd419cedef85ef55850fd090373f481 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Mar 2013 15:04:37 -0700 Subject: [PATCH 01/12] irq_work.h: fix warning when CONFIG_IRQ_WORK=n A randconfig caught repeated compiler warnings when CONFIG_IRQ_WORK=n due to the definition of a non-inline static function in : include/linux/irq_work.h +40 : warning: 'irq_work_needs_cpu' defined but not used Make it inline to supress the warning. This is caused commit 00b42959106a ("irq_work: Don't stop the tick with pending works") merged in v3.9-rc1. Signed-off-by: James Hogan Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq_work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index f5dbce50466e..66017028dcb3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -37,7 +37,7 @@ void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK bool irq_work_needs_cpu(void); #else -static bool irq_work_needs_cpu(void) { return false; } +static inline bool irq_work_needs_cpu(void) { return false; } #endif #endif /* _LINUX_IRQ_WORK_H */ From dc72c32e1fd872a9a4fdfe645283c9dcd68e556d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 22 Mar 2013 15:04:39 -0700 Subject: [PATCH 02/12] printk: Provide a wake_up_klogd() off-case wake_up_klogd() is useless when CONFIG_PRINTK=n because neither printk() nor printk_sched() are in use and there are actually no waiter on log_wait waitqueue. It should be a stub in this case for users like bust_spinlocks(). Otherwise this results in this warning when CONFIG_PRINTK=n and CONFIG_IRQ_WORK=n: kernel/built-in.o In function `wake_up_klogd': (.text.wake_up_klogd+0xb4): undefined reference to `irq_work_queue' To fix this, provide an off-case for wake_up_klogd() when CONFIG_PRINTK=n. There is much more from console_unlock() and other console related code in printk.c that should be moved under CONFIG_PRINTK. But for now, focus on a minimal fix as we passed the merged window already. [akpm@linux-foundation.org: include printk.h in bust_spinlocks.c] Signed-off-by: Frederic Weisbecker Reported-by: James Hogan Cc: James Hogan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - include/linux/printk.h | 6 ++++ kernel/printk.c | 80 ++++++++++++++++++++---------------------- lib/bust_spinlocks.c | 3 +- 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 80d36874689b..79fdd80a42d4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -390,7 +390,6 @@ extern struct pid *session_of_pgrp(struct pid *pgrp); unsigned long int_sqrt(unsigned long); extern void bust_spinlocks(int yes); -extern void wake_up_klogd(void); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; extern int panic_on_oops; diff --git a/include/linux/printk.h b/include/linux/printk.h index 1249a54d17e0..822171fcb1c8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -134,6 +134,8 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int kptr_restrict; +extern void wake_up_klogd(void); + void log_buf_kexec_setup(void); void __init setup_log_buf(int early); #else @@ -162,6 +164,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, return false; } +static inline void wake_up_klogd(void) +{ +} + static inline void log_buf_kexec_setup(void) { } diff --git a/kernel/printk.c b/kernel/printk.c index 0b31715f335a..abbdd9e2ac82 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -63,8 +63,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ -DECLARE_WAIT_QUEUE_HEAD(log_wait); - int console_printk[4] = { DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ @@ -224,6 +222,7 @@ struct log { static DEFINE_RAW_SPINLOCK(logbuf_lock); #ifdef CONFIG_PRINTK +DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static u32 syslog_idx; @@ -1957,45 +1956,6 @@ int is_console_locked(void) return console_locked; } -/* - * Delayed printk version, for scheduler-internal messages: - */ -#define PRINTK_BUF_SIZE 512 - -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_SCHED 0x02 - -static DEFINE_PER_CPU(int, printk_pending); -static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); - -static void wake_up_klogd_work_func(struct irq_work *irq_work) -{ - int pending = __this_cpu_xchg(printk_pending, 0); - - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - printk(KERN_WARNING "[sched_delayed] %s", buf); - } - - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); -} - -static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { - .func = wake_up_klogd_work_func, - .flags = IRQ_WORK_LAZY, -}; - -void wake_up_klogd(void) -{ - preempt_disable(); - if (waitqueue_active(&log_wait)) { - this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); - irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); - } - preempt_enable(); -} - static void console_cont_flush(char *text, size_t size) { unsigned long flags; @@ -2458,6 +2418,44 @@ static int __init printk_late_init(void) late_initcall(printk_late_init); #if defined CONFIG_PRINTK +/* + * Delayed printk version, for scheduler-internal messages: + */ +#define PRINTK_BUF_SIZE 512 + +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_SCHED 0x02 + +static DEFINE_PER_CPU(int, printk_pending); +static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); + +static void wake_up_klogd_work_func(struct irq_work *irq_work) +{ + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); + } + + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); +} + +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + +void wake_up_klogd(void) +{ + preempt_disable(); + if (waitqueue_active(&log_wait)) { + this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + } + preempt_enable(); +} int printk_sched(const char *fmt, ...) { diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c index 9681d54b95d1..f8e0e5367398 100644 --- a/lib/bust_spinlocks.c +++ b/lib/bust_spinlocks.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -28,5 +29,3 @@ void __attribute__((weak)) bust_spinlocks(int yes) wake_up_klogd(); } } - - From d00285884c0892bb1310df96bce6056e9ce9b9d9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 22 Mar 2013 15:04:40 -0700 Subject: [PATCH 03/12] mm/hugetlb: fix total hugetlbfs pages count when using memory overcommit accouting hugetlb_total_pages is used for overcommit calculations but the current implementation considers only the default hugetlb page size (which is either the first defined hugepage size or the one specified by default_hugepagesz kernel boot parameter). If the system is configured for more than one hugepage size, which is possible since commit a137e1cc6d6e ("hugetlbfs: per mount huge page sizes") then the overcommit estimation done by __vm_enough_memory() (resp. shown by meminfo_proc_show) is not precise - there is an impression of more available/allowed memory. This can lead to an unexpected ENOMEM/EFAULT resp. SIGSEGV when memory is accounted. Testcase: boot: hugepagesz=1G hugepages=1 the default overcommit ratio is 50 before patch: egrep 'CommitLimit' /proc/meminfo CommitLimit: 55434168 kB after patch: egrep 'CommitLimit' /proc/meminfo CommitLimit: 54909880 kB [akpm@linux-foundation.org: coding-style tweak] Signed-off-by: Wanpeng Li Acked-by: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Hillf Danton Cc: KAMEZAWA Hiroyuki Cc: [3.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0a0be33bb199..ca9a7c6d7e97 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2124,8 +2124,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf) /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { - struct hstate *h = &default_hstate; - return h->nr_huge_pages * pages_per_huge_page(h); + struct hstate *h; + unsigned long nr_total_pages = 0; + + for_each_hstate(h) + nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h); + return nr_total_pages; } static int hugetlb_acct_memory(struct hstate *h, long delta) From 2ca067efd82939dfd87827d29d36a265823a4c2f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 22 Mar 2013 15:04:41 -0700 Subject: [PATCH 04/12] poweroff: change orderly_poweroff() to use schedule_work() David said: Commit 6c0c0d4d1080 ("poweroff: fix bug in orderly_poweroff()") apparently fixes one bug in orderly_poweroff(), but introduces another. The comments on orderly_poweroff() claim it can be called from any context - and indeed we call it from interrupt context in arch/powerpc/platforms/pseries/ras.c for example. But since that commit this is no longer safe, since call_usermodehelper_fns() is not safe in interrupt context without the UMH_NO_WAIT option. orderly_poweroff() can be used from any context but UMH_WAIT_EXEC is sleepable. Move the "force" logic into __orderly_poweroff() and change orderly_poweroff() to use the global poweroff_work which simply calls __orderly_poweroff(). While at it, remove the unneeded "int argc" and change argv_split() to use GFP_KERNEL. We use the global "bool poweroff_force" to pass the argument, this can obviously affect the previous request if it is pending/running. So we only allow the "false => true" transition assuming that the pending "true" should succeed anyway. If schedule_work() fails after that we know that work->func() was not called yet, it must see the new value. This means that orderly_poweroff() becomes async even if we do not run the command and always succeeds, schedule_work() can only fail if the work is already pending. We can export __orderly_poweroff() and change the non-atomic callers which want the old semantics. Signed-off-by: Oleg Nesterov Reported-by: Benjamin Herrenschmidt Reported-by: David Gibson Cc: Lucas De Marchi Cc: Feng Hong Cc: Kees Cook Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 59 +++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 81f56445fba9..39c9c4a2949f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2185,9 +2185,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; -static int __orderly_poweroff(void) +static int __orderly_poweroff(bool force) { - int argc; char **argv; static char *envp[] = { "HOME=/", @@ -2196,35 +2195,19 @@ static int __orderly_poweroff(void) }; int ret; - argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); - if (argv == NULL) { + argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); + if (argv) { + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + argv_free(argv); + } else { printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", - __func__, poweroff_cmd); - return -ENOMEM; + __func__, poweroff_cmd); + ret = -ENOMEM; } - ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, - NULL, NULL, NULL); - argv_free(argv); - - return ret; -} - -/** - * orderly_poweroff - Trigger an orderly system poweroff - * @force: force poweroff if command execution fails - * - * This may be called from any context to trigger a system shutdown. - * If the orderly shutdown fails, it will force an immediate shutdown. - */ -int orderly_poweroff(bool force) -{ - int ret = __orderly_poweroff(); - if (ret && force) { printk(KERN_WARNING "Failed to start orderly shutdown: " - "forcing the issue\n"); - + "forcing the issue\n"); /* * I guess this should try to kick off some daemon to sync and * poweroff asap. Or not even bother syncing if we're doing an @@ -2236,4 +2219,28 @@ int orderly_poweroff(bool force) return ret; } + +static bool poweroff_force; + +static void poweroff_work_func(struct work_struct *work) +{ + __orderly_poweroff(poweroff_force); +} + +static DECLARE_WORK(poweroff_work, poweroff_work_func); + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + if (force) /* do not override the pending "true" */ + poweroff_force = true; + schedule_work(&poweroff_work); + return 0; +} EXPORT_SYMBOL_GPL(orderly_poweroff); From f9228b204f789493117e458d2fefae937edb7272 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 22 Mar 2013 15:04:43 -0700 Subject: [PATCH 05/12] mm: zone_end_pfn is too small Booting with 32 TBytes memory hits BUG at mm/page_alloc.c:552! (output below). The key hint is "page 4294967296 outside zone". 4294967296 = 0x100000000 (bit 32 is set). The problem is in include/linux/mmzone.h: 530 static inline unsigned zone_end_pfn(const struct zone *zone) 531 { 532 return zone->zone_start_pfn + zone->spanned_pages; 533 } zone_end_pfn is "unsigned" (32 bits). Changing it to "unsigned long" (64 bits) fixes the problem. zone_end_pfn() was added recently in commit 108bcc96ef70 ("mm: add & use zone_end_pfn() and zone_spans_pfn()") Output from the failure. No AGP bridge found page 4294967296 outside zone [ 4294967296 - 4327469056 ] ------------[ cut here ]------------ kernel BUG at mm/page_alloc.c:552! invalid opcode: 0000 [#1] SMP Modules linked in: CPU 0 Pid: 0, comm: swapper Not tainted 3.9.0-rc2.dtp+ #10 RIP: free_one_page+0x382/0x430 Process swapper (pid: 0, threadinfo ffffffff81942000, task ffffffff81955420) Call Trace: __free_pages_ok+0x96/0xb0 __free_pages+0x25/0x50 __free_pages_bootmem+0x8a/0x8c __free_memory_core+0xea/0x131 free_low_memory_core_early+0x4a/0x98 free_all_bootmem+0x45/0x47 mem_init+0x7b/0x14c start_kernel+0x216/0x433 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x144/0x153 Code: 89 f1 ba 01 00 00 00 31 f6 d3 e2 4c 89 ef e8 66 a4 01 00 e9 2c fe ff ff 0f 0b eb fe 0f 0b 66 66 2e 0f 1f 84 00 00 00 00 00 eb f3 <0f> 0b eb fe 0f 0b 0f 1f 84 00 00 00 00 00 eb f6 0f 0b eb fe 49 Signed-off-by: Russ Anderson Reported-by: George Beshers Acked-by: Hedi Berriche Cc: Cody P Schafer Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ede274957e05..c74092eebf5c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -527,7 +527,7 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } -static inline unsigned zone_end_pfn(const struct zone *zone) +static inline unsigned long zone_end_pfn(const struct zone *zone) { return zone->zone_start_pfn + zone->spanned_pages; } From 925e8ea6bca2c9a590565634b27768d7042e089f Mon Sep 17 00:00:00 2001 From: Ashish Jangam Date: Fri, 22 Mar 2013 15:04:44 -0700 Subject: [PATCH 06/12] drivers/rtc/rtc-da9052.c: fix for rtc device registration Add support for the virtual irq since now MFD only handles virtual irq Without this patch rtc device will fail in registration. (akpm: Ashish has a different version whcih will be needed for 3.8.x and earlier kernels) Signed-off-by: Ashish Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-da9052.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index 0dde688ca09b..969abbad7fe3 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -239,11 +239,9 @@ static int da9052_rtc_probe(struct platform_device *pdev) rtc->da9052 = dev_get_drvdata(pdev->dev.parent); platform_set_drvdata(pdev, rtc); - rtc->irq = platform_get_irq_byname(pdev, "ALM"); - ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, - da9052_rtc_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - "ALM", rtc); + rtc->irq = DA9052_IRQ_ALARM; + ret = da9052_request_irq(rtc->da9052, rtc->irq, "ALM", + da9052_rtc_irq, rtc); if (ret != 0) { rtc_err(rtc->da9052, "irq registration failed: %d\n", ret); return ret; From e66b05873a7a76afc569da6382509471cba8d5ff Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 22 Mar 2013 15:04:45 -0700 Subject: [PATCH 07/12] drivers/video/ep93xx-fb.c: include for devm_ioremap() Commit be8678149701 ("drivers/video/ep93xx-fb.c: use devm_ functions") introduced a build error: drivers/video/ep93xx-fb.c: In function 'ep93xxfb_probe': drivers/video/ep93xx-fb.c:532: error: implicit declaration of function 'devm_ioremap' drivers/video/ep93xx-fb.c:533: warning: assignment makes pointer from integer without a cast Include to pickup the declaration of 'devm_ioremap'. Signed-off-by: H Hartley Sweeten Cc: Florian Tobias Schandinat Acked-by: Ryan Mallon Cc: Damien Cassou Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/ep93xx-fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/ep93xx-fb.c b/drivers/video/ep93xx-fb.c index 3f2519d30715..e06cd5d90c97 100644 --- a/drivers/video/ep93xx-fb.c +++ b/drivers/video/ep93xx-fb.c @@ -23,6 +23,7 @@ #include #include #include +#include #include From 0ef1594c017521ea89278e80fe3f80dafb17abde Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 22 Mar 2013 15:04:47 -0700 Subject: [PATCH 08/12] drivers/rtc/rtc-at91rm9200.c: use a variable for storing IMR On some revisions of AT91 SoCs, the RTC IMR register is not working. Instead of elaborating a workaround for that specific SoC or IP version, we simply use a software variable to store the Interrupt Mask Register and modify it for each enabling/disabling of an interrupt. The overhead of this is negligible anyway. The interrupt mask register (IMR) for the RTC is broken on the AT91SAM9x5 sub-family of SoCs (good overview of the members here: http://www.eewiki.net/display/linuxonarm/AT91SAM9x5 ). The "user visible effect" is the RTC doesn't work. That sub-family is less than two years old and only has devicetree (DT) support and came online circa lk 3.7 . The dust is yet to settle on the DT stuff at least for AT91 SoCs (translation: lots of stuff is still broken, so much that it is hard to know where to start). The fix in the patch is pretty simple: just shadow the silicon IMR register with a variable in the driver. Some older SoCs (pre-DT) use the the rtc-at91rm9200 driver (e.g. obviously the AT91RM9200) and they should not be impacted by the change. There shouldn't be a large volume of interrupts associated with a RTC. Signed-off-by: Nicolas Ferre Reported-by: Douglas Gilbert Cc: Jean-Christophe PLAGNIOL-VILLARD Cc: Ludovic Desroches Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-at91rm9200.c | 50 ++++++++++++++++++++++-------------- drivers/rtc/rtc-at91rm9200.h | 1 - 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 434ebc3a99dc..0a9f27e094ea 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -44,6 +44,7 @@ static DECLARE_COMPLETION(at91_rtc_updated); static unsigned int at91_alarm_year = AT91_RTC_EPOCH; static void __iomem *at91_rtc_regs; static int irq; +static u32 at91_rtc_imr; /* * Decode time/date into rtc_time structure @@ -108,9 +109,11 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm) cr = at91_rtc_read(AT91_RTC_CR); at91_rtc_write(AT91_RTC_CR, cr | AT91_RTC_UPDCAL | AT91_RTC_UPDTIM); + at91_rtc_imr |= AT91_RTC_ACKUPD; at91_rtc_write(AT91_RTC_IER, AT91_RTC_ACKUPD); wait_for_completion(&at91_rtc_updated); /* wait for ACKUPD interrupt */ at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD); + at91_rtc_imr &= ~AT91_RTC_ACKUPD; at91_rtc_write(AT91_RTC_TIMR, bin2bcd(tm->tm_sec) << 0 @@ -142,7 +145,7 @@ static int at91_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm) tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year); tm->tm_year = at91_alarm_year - 1900; - alrm->enabled = (at91_rtc_read(AT91_RTC_IMR) & AT91_RTC_ALARM) + alrm->enabled = (at91_rtc_imr & AT91_RTC_ALARM) ? 1 : 0; dev_dbg(dev, "%s(): %4d-%02d-%02d %02d:%02d:%02d\n", __func__, @@ -168,6 +171,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) tm.tm_sec = alrm->time.tm_sec; at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM); + at91_rtc_imr &= ~AT91_RTC_ALARM; at91_rtc_write(AT91_RTC_TIMALR, bin2bcd(tm.tm_sec) << 0 | bin2bcd(tm.tm_min) << 8 @@ -180,6 +184,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) if (alrm->enabled) { at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM); + at91_rtc_imr |= AT91_RTC_ALARM; at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM); } @@ -196,9 +201,12 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) if (enabled) { at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM); + at91_rtc_imr |= AT91_RTC_ALARM; at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM); - } else + } else { at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM); + at91_rtc_imr &= ~AT91_RTC_ALARM; + } return 0; } @@ -207,12 +215,10 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) */ static int at91_rtc_proc(struct device *dev, struct seq_file *seq) { - unsigned long imr = at91_rtc_read(AT91_RTC_IMR); - seq_printf(seq, "update_IRQ\t: %s\n", - (imr & AT91_RTC_ACKUPD) ? "yes" : "no"); + (at91_rtc_imr & AT91_RTC_ACKUPD) ? "yes" : "no"); seq_printf(seq, "periodic_IRQ\t: %s\n", - (imr & AT91_RTC_SECEV) ? "yes" : "no"); + (at91_rtc_imr & AT91_RTC_SECEV) ? "yes" : "no"); return 0; } @@ -227,7 +233,7 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) unsigned int rtsr; unsigned long events = 0; - rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_read(AT91_RTC_IMR); + rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_imr; if (rtsr) { /* this interrupt is shared! Is it ours? */ if (rtsr & AT91_RTC_ALARM) events |= (RTC_AF | RTC_IRQF); @@ -291,6 +297,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | AT91_RTC_SECEV | AT91_RTC_TIMEV | AT91_RTC_CALEV); + at91_rtc_imr = 0; ret = request_irq(irq, at91_rtc_interrupt, IRQF_SHARED, @@ -329,6 +336,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | AT91_RTC_SECEV | AT91_RTC_TIMEV | AT91_RTC_CALEV); + at91_rtc_imr = 0; free_irq(irq, pdev); rtc_device_unregister(rtc); @@ -341,31 +349,35 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) /* AT91RM9200 RTC Power management control */ -static u32 at91_rtc_imr; +static u32 at91_rtc_bkpimr; + static int at91_rtc_suspend(struct device *dev) { /* this IRQ is shared with DBGU and other hardware which isn't * necessarily doing PM like we are... */ - at91_rtc_imr = at91_rtc_read(AT91_RTC_IMR) - & (AT91_RTC_ALARM|AT91_RTC_SECEV); - if (at91_rtc_imr) { - if (device_may_wakeup(dev)) + at91_rtc_bkpimr = at91_rtc_imr & (AT91_RTC_ALARM|AT91_RTC_SECEV); + if (at91_rtc_bkpimr) { + if (device_may_wakeup(dev)) { enable_irq_wake(irq); - else - at91_rtc_write(AT91_RTC_IDR, at91_rtc_imr); - } + } else { + at91_rtc_write(AT91_RTC_IDR, at91_rtc_bkpimr); + at91_rtc_imr &= ~at91_rtc_bkpimr; + } +} return 0; } static int at91_rtc_resume(struct device *dev) { - if (at91_rtc_imr) { - if (device_may_wakeup(dev)) + if (at91_rtc_bkpimr) { + if (device_may_wakeup(dev)) { disable_irq_wake(irq); - else - at91_rtc_write(AT91_RTC_IER, at91_rtc_imr); + } else { + at91_rtc_imr |= at91_rtc_bkpimr; + at91_rtc_write(AT91_RTC_IER, at91_rtc_bkpimr); + } } return 0; } diff --git a/drivers/rtc/rtc-at91rm9200.h b/drivers/rtc/rtc-at91rm9200.h index da1945e5f714..5f940b6844cb 100644 --- a/drivers/rtc/rtc-at91rm9200.h +++ b/drivers/rtc/rtc-at91rm9200.h @@ -64,7 +64,6 @@ #define AT91_RTC_SCCR 0x1c /* Status Clear Command Register */ #define AT91_RTC_IER 0x20 /* Interrupt Enable Register */ #define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */ -#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */ #define AT91_RTC_VER 0x2c /* Valid Entry Register */ #define AT91_RTC_NVTIM (1 << 0) /* Non valid Time */ From 8d640a51ec9e9cdefa680b67ad55f933eefc5923 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 22 Mar 2013 15:04:48 -0700 Subject: [PATCH 09/12] dma-debug: fix locking bug in check_unmap() In check_unmap() it is possible to get into a dead-locked state if dma_mapping_error is called. The problem is that the bucket is locked in check_unmap, and locked again by debug_dma_mapping_error which is called by dma_mapping_error. To resolve that we must release the lock on the bucket before making the call to dma_mapping_error. [akpm@linux-foundation.org: restore 80-col trickery to be consistent with the rest of the file] Signed-off-by: Alexander Duyck Cc: Joerg Roedel Reviewed-by: Shuah Khan Tested-by: Shuah Khan Cc: Jakub Kicinski Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 5e396accd3d0..d3e06a5e981e 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -862,17 +862,21 @@ static void check_unmap(struct dma_debug_entry *ref) entry = bucket_find_exact(bucket, ref); if (!entry) { + /* must drop lock before calling dma_mapping_error */ + put_hash_bucket(bucket, &flags); + if (dma_mapping_error(ref->dev, ref->dev_addr)) { err_printk(ref->dev, NULL, - "DMA-API: device driver tries " - "to free an invalid DMA memory address\n"); - return; + "DMA-API: device driver tries to free an " + "invalid DMA memory address\n"); + } else { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries to free DMA " + "memory it has not allocated [device " + "address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); } - err_printk(ref->dev, NULL, "DMA-API: device driver tries " - "to free DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - goto out; + return; } if (ref->size != entry->size) { @@ -936,7 +940,6 @@ static void check_unmap(struct dma_debug_entry *ref) hash_bucket_del(entry); dma_entry_free(entry); -out: put_hash_bucket(bucket, &flags); } From 96e7d7a1e0fc7780b4c1981c787e42473aa91a95 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 22 Mar 2013 15:04:49 -0700 Subject: [PATCH 10/12] dma-debug: update DMA debug API to better handle multiple mappings of a buffer There were reports of the igb driver unmapping buffers without calling dma_mapping_error. On closer inspection issues were found in the DMA debug API and how it handled multiple mappings of the same buffer. The issue I found is the fact that the debug_dma_mapping_error would only set the map_err_type to MAP_ERR_CHECKED in the case that the was only one match for device and device address. However in the case of non-IOMMU, multiple addresses existed and as a result it was not setting this field once a second mapping was instantiated. I have resolved this by changing the search so that it instead will now set MAP_ERR_CHECKED on the first buffer that matches the device and DMA address that is currently in the state MAP_ERR_NOT_CHECKED. A secondary side effect of this patch is that in the case of multiple buffers using the same address only the last mapping will have a valid map_err_type. The previous mappings will all end up with map_err_type set to MAP_ERR_CHECKED because of the dma_mapping_error call in debug_dma_map_page. However this behavior may be preferable as it means you will likely only see one real error per multi-mapped buffer, versus the current behavior of multiple false errors mer multi-mapped buffer. Signed-off-by: Alexander Duyck Cc: Joerg Roedel Reviewed-by: Shuah Khan Tested-by: Shuah Khan Cc: Jakub Kicinski Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d3e06a5e981e..d87a17a819d0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1085,13 +1085,27 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) ref.dev = dev; ref.dev_addr = dma_addr; bucket = get_hash_bucket(&ref, &flags); - entry = bucket_find_exact(bucket, &ref); - if (!entry) - goto out; + list_for_each_entry(entry, &bucket->list, list) { + if (!exact_match(&ref, entry)) + continue; + + /* + * The same physical address can be mapped multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therefore we implement a + * best-fit algorithm here which updates the first entry + * from the hash which fits the reference value and is + * not currently listed as being checked. + */ + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + entry->map_err_type = MAP_ERR_CHECKED; + break; + } + } - entry->map_err_type = MAP_ERR_CHECKED; -out: put_hash_bucket(bucket, &flags); } EXPORT_SYMBOL(debug_dma_mapping_error); From ca4b3f302c90de5e516296e581c31c80125cd24b Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 22 Mar 2013 15:04:50 -0700 Subject: [PATCH 11/12] mm/hotplug: only free wait_table if it's allocated by vmalloc zone->wait_table may be allocated from bootmem, it can not be freed. Signed-off-by: Jianguo Wu Reviewed-by: Tang Chen Cc: Tang Chen Cc: Jiang Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9597eec8239d..ee3765760818 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1779,7 +1779,11 @@ void try_offline_node(int nid) for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; - if (zone->wait_table) + /* + * wait_table may be allocated from boot memory, + * here only free if it's allocated by vmalloc. + */ + if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } From 38d78e587d4960d0db94add518d27ee74bad2301 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 22 Mar 2013 15:04:51 -0700 Subject: [PATCH 12/12] mqueue: sys_mq_open: do not call mnt_drop_write() if read-only mnt_drop_write() must be called only if mnt_want_write() succeeded, otherwise the mnt_writers counter will diverge. mnt_writers counters are used to check if remounting FS as read-only is OK, so after an extra mnt_drop_write() call, it would be impossible to remount mqueue FS as read-only. Besides, on umount a warning would be printed like this one: ===================================== [ BUG: bad unlock balance detected! ] 3.9.0-rc3 #5 Not tainted ------------------------------------- a.out/12486 is trying to release lock (sb_writers) at: mnt_drop_write+0x1f/0x30 but there are no more locks to release! Signed-off-by: Vladimir Davydov Cc: Doug Ledford Cc: KOSAKI Motohiro Cc: "Eric W. Biederman" Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/mqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e5c4f609f22c..3953fda2e8bd 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -840,7 +840,8 @@ out_putfd: fd = error; } mutex_unlock(&root->d_inode->i_mutex); - mnt_drop_write(mnt); + if (!ro) + mnt_drop_write(mnt); out_putname: putname(name); return fd;