From 5a21d489fd9541a4a66b9a500659abaca1b19a51 Mon Sep 17 00:00:00 2001 From: Bojan Smojver Date: Sun, 29 Apr 2012 22:42:06 +0200 Subject: [PATCH 01/13] PM / Hibernate: Hibernate/thaw fixes/improvements 1. Do not allocate memory for buffers from emergency pools, unless absolutely required. Do not warn about and do not retry non-essential failed allocations. 2. Do not check the amount of free pages left on every single page write, but wait until one map is completely populated and then check. 3. Set maximum number of pages for read buffering consistently, instead of inadvertently depending on the size of the sector type. 4. Fix copyright line, which I missed when I submitted the hibernation threading patch. 5. Dispense with bit shifting arithmetic to improve readability. 6. Really recalculate the number of pages required to be free after all allocations have been done. 7. Fix calculation of pages required for read buffering. Only count in pages that do not belong to high memory. Signed-off-by: Bojan Smojver Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 62 ++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index eef311a58a64..11e22c068e8b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,7 +6,7 @@ * * Copyright (C) 1998,2001-2005 Pavel Machek * Copyright (C) 2006 Rafael J. Wysocki - * Copyright (C) 2010 Bojan Smojver + * Copyright (C) 2010-2012 Bojan Smojver * * This file is released under the GPLv2. * @@ -282,14 +282,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) return -ENOSPC; if (bio_chain) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { @@ -367,12 +370,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - } - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; - handle->reqd_free_pages = reqd_free_pages(); + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } } out: return error; @@ -419,8 +427,9 @@ static int swap_writer_finish(struct swap_map_handle *handle, /* Maximum number of threads for compression/decompression. */ #define LZO_THREADS 3 -/* Maximum number of pages for read buffering. */ -#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 +#define LZO_MAX_RD_PAGES 8192 /** @@ -630,12 +639,6 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - /* - * Adjust number of free pages after all allocations have been done. - * We don't want to run out of pages when writing. - */ - handle->reqd_free_pages = reqd_free_pages(); - /* * Start the CRC32 thread. */ @@ -657,6 +660,12 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + printk(KERN_INFO "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", @@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned i, thr, run_threads, nr_threads; unsigned ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0; - unsigned long read_pages; + unsigned long read_pages = 0; unsigned char **page = NULL; struct dec_data *data = NULL; struct crc_data *crc = NULL; @@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of pages for read buffering, in case we are short. + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. */ - read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; - read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT); + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (!page[i]) { if (i < LZO_CMP_PAGES) { ring_size = i; From 52d136cc2cf6659ee247dbcc88c9e7bd7428ad06 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:19 +0200 Subject: [PATCH 02/13] PM / Sleep: Look for wakeup events in later stages of device suspend Currently, the device suspend code in drivers/base/power/main.c only checks if there have been any wakeup events, and therefore the ongoing system transition to a sleep state should be aborted, during the first (i.e. "suspend") device suspend phase. However, wakeup events may be reported later as well, so it's reasonable to look for them in the in the subsequent (i.e. "late suspend" and "suspend noirq") phases. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b462c0e341cb..e0fb5b0435a3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -889,6 +889,11 @@ static int dpm_suspend_noirq(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_noirq_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) @@ -962,6 +967,11 @@ static int dpm_suspend_late(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) From 60af1066913162c5dd13fad3b872a67b1eb7da0f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:34 +0200 Subject: [PATCH 03/13] PM / Sleep: Use wait queue to signal "no wakeup events in progress" The current wakeup source deactivation code doesn't do anything when the counter of wakeup events in progress goes down to zero, which requires pm_get_wakeup_count() to poll that counter periodically. Although this reduces the average time it takes to deactivate a wakeup source, it also may lead to a substantial amount of unnecessary polling if there are extended periods of wakeup activity. Thus it seems reasonable to use a wait queue for signaling the "no wakeup events in progress" condition and remove the polling. Signed-off-by: Rafael J. Wysocki Acked-by: mark gross Acked-by: Greg Kroah-Hartman --- drivers/base/power/wakeup.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2a3e581b8dcd..92f220d89d35 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -17,8 +17,6 @@ #include "power.h" -#define TIMEOUT 100 - /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. @@ -52,6 +50,8 @@ static void pm_wakeup_timer_fn(unsigned long data); static LIST_HEAD(wakeup_sources); +static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + /** * wakeup_source_prepare - Prepare a new wakeup source for initialization. * @ws: Wakeup source to prepare. @@ -442,6 +442,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { + unsigned int cnt, inpr; ktime_t duration; ktime_t now; @@ -476,6 +477,10 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) * couter of wakeup events in progress simultaneously. */ atomic_add(MAX_IN_PROGRESS, &combined_event_count); + + split_counters(&cnt, &inpr); + if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) + wake_up(&wakeup_count_wait_queue); } /** @@ -667,14 +672,19 @@ bool pm_wakeup_pending(void) bool pm_get_wakeup_count(unsigned int *count) { unsigned int cnt, inpr; + DEFINE_WAIT(wait); for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) break; pm_wakeup_update_hit_counts(); - schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); + + schedule(); } + finish_wait(&wakeup_count_wait_queue, &wait); split_counters(&cnt, &inpr); *count = cnt; From 30e3ce6dcbe3fc29c343b17e768b07d4a795de21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:52 +0200 Subject: [PATCH 04/13] PM / Sleep: Change wakeup source statistics to follow Android Wakeup statistics used by Android are slightly different from what we have in wakeup sources at the moment and there aren't any known users of those statistics other than Android, so modify them to make it easier for Android to switch to wakeup sources. This removes the struct wakeup_source's hit_cout field, which is very rough and therefore not very useful, and adds two new fields, wakeup_count and expire_count. The first one tracks how many times the wakeup source is activated with events_check_enabled set (which roughly corresponds to the situations when a system power transition to a sleep state is in progress and would be aborted by this wakeup source if it were the only active one at that time) and the second one is the number of times the wakeup source has been activated with a timeout that expired. Additionally, the last_time field is now updated when the wakeup source is deactivated too (previously it was only updated during the wakeup source's activation), which seems to be what Android does with the analogous counter for wakelocks. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 24 +++++-- drivers/base/power/sysfs.c | 30 +++++++-- drivers/base/power/wakeup.c | 64 ++++++++----------- include/linux/pm_wakeup.h | 11 ++-- 4 files changed, 77 insertions(+), 52 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 840f7d64d483..b0a5d9a6135e 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -96,16 +96,26 @@ Description: is read-only. If the device is not enabled to wake up the system from sleep states, this attribute is not present. -What: /sys/devices/.../power/wakeup_hit_count -Date: September 2010 +What: /sys/devices/.../power/wakeup_abort_count +Date: February 2012 Contact: Rafael J. Wysocki Description: - The /sys/devices/.../wakeup_hit_count attribute contains the + The /sys/devices/.../wakeup_abort_count attribute contains the number of times the processing of a wakeup event associated with - the device might prevent the system from entering a sleep state. - This attribute is read-only. If the device is not enabled to - wake up the system from sleep states, this attribute is not - present. + the device might have aborted system transition into a sleep + state in progress. This attribute is read-only. If the device + is not enabled to wake up the system from sleep states, this + attribute is not present. + +What: /sys/devices/.../power/wakeup_expire_count +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_expire_count attribute contains the + number of times a wakeup event associated with the device has + been reported with a timeout that expired. This attribute is + read-only. If the device is not enabled to wake up the system + from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_active Date: September 2010 diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 95c12f6cb5b9..13e40b9021b9 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -314,22 +314,41 @@ static ssize_t wakeup_active_count_show(struct device *dev, static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); -static ssize_t wakeup_hit_count_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_abort_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->hit_count; + count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL); +static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL); + +static ssize_t wakeup_expire_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->expire_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -486,7 +505,8 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, - &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_abort_count.attr, + &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 92f220d89d35..7a6eada4534d 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -21,7 +21,7 @@ * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. */ -bool events_check_enabled; +bool events_check_enabled __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -382,6 +382,21 @@ static void wakeup_source_activate(struct wakeup_source *ws) atomic_inc(&combined_event_count); } +/** + * wakeup_source_report_event - Report wakeup event using the given source. + * @ws: Wakeup source to report the event for. + */ +static void wakeup_source_report_event(struct wakeup_source *ws) +{ + ws->event_count++; + /* This is racy, but the counter is approximate anyway. */ + if (events_check_enabled) + ws->wakeup_count++; + + if (!ws->active) + wakeup_source_activate(ws); +} + /** * __pm_stay_awake - Notify the PM core of a wakeup event. * @ws: Wakeup source object associated with the source of the event. @@ -397,10 +412,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); - + wakeup_source_report_event(ws); del_timer(&ws->timer); ws->timer_expires = 0; @@ -469,6 +481,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) ws->max_time = duration; + ws->last_time = now; del_timer(&ws->timer); ws->timer_expires = 0; @@ -541,8 +554,10 @@ static void pm_wakeup_timer_fn(unsigned long data) spin_lock_irqsave(&ws->lock, flags); if (ws->active && ws->timer_expires - && time_after_eq(jiffies, ws->timer_expires)) + && time_after_eq(jiffies, ws->timer_expires)) { wakeup_source_deactivate(ws); + ws->expire_count++; + } spin_unlock_irqrestore(&ws->lock, flags); } @@ -569,9 +584,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); + wakeup_source_report_event(ws); if (!msec) { wakeup_source_deactivate(ws); @@ -613,24 +626,6 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); -/** - * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources. - */ -static void pm_wakeup_update_hit_counts(void) -{ - unsigned long flags; - struct wakeup_source *ws; - - rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - spin_lock_irqsave(&ws->lock, flags); - if (ws->active) - ws->hit_count++; - spin_unlock_irqrestore(&ws->lock, flags); - } - rcu_read_unlock(); -} - /** * pm_wakeup_pending - Check if power transition in progress should be aborted. * @@ -653,8 +648,6 @@ bool pm_wakeup_pending(void) events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); - if (ret) - pm_wakeup_update_hit_counts(); return ret; } @@ -680,7 +673,6 @@ bool pm_get_wakeup_count(unsigned int *count) split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) break; - pm_wakeup_update_hit_counts(); schedule(); } @@ -713,8 +705,6 @@ bool pm_save_wakeup_count(unsigned int count) events_check_enabled = true; } spin_unlock_irq(&events_lock); - if (!events_check_enabled) - pm_wakeup_update_hit_counts(); return events_check_enabled; } @@ -749,9 +739,10 @@ static int print_wakeup_source_stats(struct seq_file *m, active_time = ktime_set(0, 0); } - ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t" + ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" "%lld\t\t%lld\t\t%lld\t\t%lld\n", - ws->name, active_count, ws->event_count, ws->hit_count, + ws->name, active_count, ws->event_count, + ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); @@ -768,8 +759,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; - seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t" - "active_since\ttotal_time\tmax_time\tlast_change\n"); + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d9f05113e5fb..5285317a612a 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -33,12 +33,14 @@ * * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. - * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @last_time: Monotonic clock when the wakeup source's was touched last time. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. - * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @expire_count: Number of times the wakeup source's timeout has expired. + * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. + * @has_timeout: The wakeup source has been activated with a timeout. */ struct wakeup_source { const char *name; @@ -52,8 +54,9 @@ struct wakeup_source { unsigned long event_count; unsigned long active_count; unsigned long relax_count; - unsigned long hit_count; - unsigned int active:1; + unsigned long expire_count; + unsigned long wakeup_count; + bool active:1; }; #ifdef CONFIG_PM_SLEEP From 6791e36c4a40e8930e08669e60077eea6770c429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Sun, 29 Apr 2012 22:53:02 +0200 Subject: [PATCH 05/13] PM / Sleep: Add wakeup_source_activate and wakeup_source_deactivate tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints to wakeup_source_activate and wakeup_source_deactivate. Useful for checking that specific wakeup sources overlap as expected. Signed-off-by: Arve Hjønnevåg Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 12 +++++++++--- include/trace/events/power.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7a6eada4534d..1132799421cd 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "power.h" @@ -374,12 +375,16 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable); */ static void wakeup_source_activate(struct wakeup_source *ws) { + unsigned int cec; + ws->active = true; ws->active_count++; ws->last_time = ktime_get(); /* Increment the counter of events in progress. */ - atomic_inc(&combined_event_count); + cec = atomic_inc_return(&combined_event_count); + + trace_wakeup_source_activate(ws->name, cec); } /** @@ -454,7 +459,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { - unsigned int cnt, inpr; + unsigned int cnt, inpr, cec; ktime_t duration; ktime_t now; @@ -489,7 +494,8 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. */ - atomic_add(MAX_IN_PROGRESS, &combined_event_count); + cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); + trace_wakeup_source_deactivate(ws->name, cec); split_counters(&cnt, &inpr); if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index cae9a94f025d..0c9783841a30 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -65,6 +65,40 @@ TRACE_EVENT(machine_suspend, TP_printk("state=%lu", (unsigned long)__entry->state) ); +DECLARE_EVENT_CLASS(wakeup_source, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("%s state=0x%lx", __get_str(name), + (unsigned long)__entry->state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_activate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + #ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED /* From 7483b4a4d9abf9dcf1ffe6e805ead2847ec3264e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:22 +0200 Subject: [PATCH 06/13] PM / Sleep: Implement opportunistic sleep, v2 Introduce a mechanism by which the kernel can trigger global transitions to a sleep state chosen by user space if there are no active wakeup sources. It consists of a new sysfs attribute, /sys/power/autosleep, that can be written one of the strings returned by reads from /sys/power/state, an ordered workqueue and a work item carrying out the "suspend" operations. If a string representing the system's sleep state is written to /sys/power/autosleep, the work item triggering transitions to that state is queued up and it requeues itself after every execution until user space writes "off" to /sys/power/autosleep. That work item enables the detection of wakeup events using the functions already defined in drivers/base/power/wakeup.c (with one small modification) and calls either pm_suspend(), or hibernate() to put the system into a sleep state. If a wakeup event is reported while the transition is in progress, it will abort the transition and the "system suspend" work item will be queued up again. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: NeilBrown --- Documentation/ABI/testing/sysfs-power | 17 ++++ drivers/base/power/wakeup.c | 34 +++---- include/linux/suspend.h | 13 ++- kernel/power/Kconfig | 8 ++ kernel/power/Makefile | 1 + kernel/power/autosleep.c | 123 ++++++++++++++++++++++++++ kernel/power/main.c | 119 +++++++++++++++++++++---- kernel/power/power.h | 18 ++++ 8 files changed, 298 insertions(+), 35 deletions(-) create mode 100644 kernel/power/autosleep.c diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index b464d12761ba..237c735db6c9 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -172,3 +172,20 @@ Description: Reading from this file will display the current value, which is set to 1 MB by default. + +What: /sys/power/autosleep +Date: April 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/autosleep file can be written one of the strings + returned by reads from /sys/power/state. If that happens, a + work item attempting to trigger a transition of the system to + the sleep state represented by that string is queued up. This + attempt will only succeed if there are no active wakeup sources + in the system at that time. After every execution, regardless + of whether or not the attempt to put the system to sleep has + succeeded, the work item requeues itself until user space + writes "off" to /sys/power/autosleep. + + Reading from this file causes the last string successfully + written to it to be returned. diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 1132799421cd..cf1706df7610 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -660,29 +660,33 @@ bool pm_wakeup_pending(void) /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. + * @block: Whether or not to block. * - * Store the number of registered wakeup events at the address in @count. Block - * if the current number of wakeup events being processed is nonzero. + * Store the number of registered wakeup events at the address in @count. If + * @block is set, block until the current number of wakeup events being + * processed is zero. * - * Return 'false' if the wait for the number of wakeup events being processed to - * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return 'true'. + * Return 'false' if the current number of wakeup events being processed is + * nonzero. Otherwise return 'true'. */ -bool pm_get_wakeup_count(unsigned int *count) +bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; - DEFINE_WAIT(wait); - for (;;) { - prepare_to_wait(&wakeup_count_wait_queue, &wait, - TASK_INTERRUPTIBLE); - split_counters(&cnt, &inpr); - if (inpr == 0 || signal_pending(current)) - break; + if (block) { + DEFINE_WAIT(wait); - schedule(); + for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; + + schedule(); + } + finish_wait(&wakeup_count_wait_queue, &wait); } - finish_wait(&wakeup_count_wait_queue, &wait); split_counters(&cnt, &inpr); *count = cnt; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ac1c114c499d..76b7ec7d3a81 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -356,7 +356,7 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); -extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); static inline void lock_system_sleep(void) @@ -407,6 +407,17 @@ static inline void unlock_system_sleep(void) {} #endif /* !CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +void queue_up_suspend_work(void); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline void queue_up_suspend_work(void) {} + +#endif /* !CONFIG_PM_AUTOSLEEP */ + #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e3216..67947083f842 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -103,6 +103,14 @@ config PM_SLEEP_SMP select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 66d808ec5252..010b2f7e148c 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 000000000000..42348e3589d3 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,123 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki + */ + +#include +#include +#include + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) + queue_up_suspend_work(); + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c12581f1c62..ba6a5645952d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,8 +269,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; @@ -278,27 +277,48 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { - error = pm_suspend(state); - break; - } - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -339,7 +359,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -347,15 +368,69 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && !(strncmp(buf, "off", 3) && strncmp(buf, "off\n", 4))) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -409,6 +484,9 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif @@ -444,7 +522,10 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 98f3622d7407..4cf80fa115d9 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -264,3 +264,21 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ From 55850945e872531644f31fefd217d61dd15dcab8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:32 +0200 Subject: [PATCH 07/13] PM / Sleep: Add "prevent autosleep time" statistics to wakeup sources Android uses one wakelock statistics that is only necessary for opportunistic sleep. Namely, the prevent_suspend_time field accumulates the total time the given wakelock has been locked while "automatic suspend" was enabled. Add an analogous field, prevent_sleep_time, to wakeup sources and make it behave in a similar way. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 11 ++++ drivers/base/power/sysfs.c | 24 ++++++++ drivers/base/power/wakeup.c | 61 +++++++++++++++++-- include/linux/pm_wakeup.h | 4 ++ include/linux/suspend.h | 1 + kernel/power/autosleep.c | 6 +- 6 files changed, 102 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index b0a5d9a6135e..45000f0db4d4 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -158,6 +158,17 @@ Description: not enabled to wake up the system from sleep states, this attribute is not present. +What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute + contains the total time the device has been preventing + opportunistic transitions to sleep states from occuring. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is not + present. + What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 Contact: Alan Stern diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 13e40b9021b9..48be2ad4dd2c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -417,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev, } static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, + wakeup_prevent_sleep_time_show, NULL); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG @@ -511,6 +532,9 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &dev_attr_wakeup_prevent_sleep_time_ms.attr, +#endif #endif NULL, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cf1706df7610..2595b8d8fe1f 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -380,6 +380,8 @@ static void wakeup_source_activate(struct wakeup_source *ws) ws->active = true; ws->active_count++; ws->last_time = ktime_get(); + if (ws->autosleep_enabled) + ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ cec = atomic_inc_return(&combined_event_count); @@ -449,6 +451,17 @@ void pm_stay_awake(struct device *dev) } EXPORT_SYMBOL_GPL(pm_stay_awake); +#ifdef CONFIG_PM_AUTOSLEEP +static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) +{ + ktime_t delta = ktime_sub(now, ws->start_prevent_time); + ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); +} +#else +static inline void update_prevent_sleep_time(struct wakeup_source *ws, + ktime_t now) {} +#endif + /** * wakup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. @@ -490,6 +503,9 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) del_timer(&ws->timer); ws->timer_expires = 0; + if (ws->autosleep_enabled) + update_prevent_sleep_time(ws, now); + /* * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. @@ -718,6 +734,34 @@ bool pm_save_wakeup_count(unsigned int count) return events_check_enabled; } +#ifdef CONFIG_PM_AUTOSLEEP +/** + * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. + * @enabled: Whether to set or to clear the autosleep_enabled flags. + */ +void pm_wakep_autosleep_enabled(bool set) +{ + struct wakeup_source *ws; + ktime_t now = ktime_get(); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { + ws->autosleep_enabled = set; + if (ws->active) { + if (set) + ws->start_prevent_time = now; + else + update_prevent_sleep_time(ws, now); + } + } + spin_unlock_irq(&ws->lock); + } + rcu_read_unlock(); +} +#endif /* CONFIG_PM_AUTOSLEEP */ + static struct dentry *wakeup_sources_stats_dentry; /** @@ -733,28 +777,37 @@ static int print_wakeup_source_stats(struct seq_file *m, ktime_t max_time; unsigned long active_count; ktime_t active_time; + ktime_t prevent_sleep_time; int ret; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; + prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { - active_time = ktime_sub(ktime_get(), ws->last_time); + ktime_t now = ktime_get(); + + active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time.tv64 > max_time.tv64) max_time = active_time; + + if (ws->autosleep_enabled) + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(now, ws->start_prevent_time)); } else { active_time = ktime_set(0, 0); } ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" - "%lld\t\t%lld\t\t%lld\t\t%lld\n", + "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), - ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + ktime_to_ms(max_time), ktime_to_ms(ws->last_time), + ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); @@ -771,7 +824,7 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" - "last_change\n"); + "last_change\tprevent_suspend_time\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 5285317a612a..569781faa504 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -34,6 +34,7 @@ * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. * @last_time: Monotonic clock when the wakeup source's was touched last time. + * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. @@ -51,12 +52,15 @@ struct wakeup_source { ktime_t total_time; ktime_t max_time; ktime_t last_time; + ktime_t start_prevent_time; + ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; unsigned long expire_count; unsigned long wakeup_count; bool active:1; + bool autosleep_enabled:1; }; #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 76b7ec7d3a81..cd83059fb592 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -358,6 +358,7 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); +extern void pm_wakep_autosleep_enabled(bool set); static inline void lock_system_sleep(void) { diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 42348e3589d3..ca304046d9e2 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -101,8 +101,12 @@ int pm_autosleep_set_state(suspend_state_t state) __pm_relax(autosleep_ws); - if (state > PM_SUSPEND_ON) + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } mutex_unlock(&autosleep_lock); return 0; From b86ff9820fd5df69295273b9aa68e58786ffc23f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:42 +0200 Subject: [PATCH 08/13] PM / Sleep: Add user space interface for manipulating wakeup sources, v3 Android allows user space to manipulate wakelocks using two sysfs file located in /sys/power/, wake_lock and wake_unlock. Writing a wakelock name and optionally a timeout to the wake_lock file causes the wakelock whose name was written to be acquired (it is created before is necessary), optionally with the given timeout. Writing the name of a wakelock to wake_unlock causes that wakelock to be released. Implement an analogous interface for user space using wakeup sources. Add the /sys/power/wake_lock and /sys/power/wake_unlock files allowing user space to create, activate and deactivate wakeup sources, such that writing a name and optionally a timeout to wake_lock causes the wakeup source of that name to be activated, optionally with the given timeout. If that wakeup source doesn't exist, it will be created and then activated. Writing a name to wake_unlock causes the wakeup source of that name, if there is one, to be deactivated. Wakeup sources created with the help of wake_lock that haven't been used for more than 5 minutes are garbage collected and destroyed. Moreover, there can be only WL_NUMBER_LIMIT wakeup sources created with the help of wake_lock present at a time. The data type used to track wakeup sources created by user space is called "struct wakelock" to indicate the origins of this feature. This version of the patch includes an rbtree manipulation fix from John Stultz. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: NeilBrown --- Documentation/ABI/testing/sysfs-power | 42 +++++ drivers/base/power/wakeup.c | 1 + kernel/power/Kconfig | 8 + kernel/power/Makefile | 1 + kernel/power/main.c | 41 +++++ kernel/power/power.h | 9 ++ kernel/power/wakelock.c | 215 ++++++++++++++++++++++++++ 7 files changed, 317 insertions(+) create mode 100644 kernel/power/wakelock.c diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 237c735db6c9..31725ffeeb3a 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -189,3 +189,45 @@ Description: Reading from this file causes the last string successfully written to it to be returned. + +What: /sys/power/wake_lock +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wake_lock file allows user space to create + wakeup source objects and activate them on demand (if one of + those wakeup sources is active, reads from the + /sys/power/wakeup_count file block or return false). When a + string without white space is written to /sys/power/wake_lock, + it will be assumed to represent a wakeup source name. If there + is a wakeup source object with that name, it will be activated + (unless active already). Otherwise, a new wakeup source object + will be registered, assigned the given name and activated. + If a string written to /sys/power/wake_lock contains white + space, the part of the string preceding the white space will be + regarded as a wakeup source name and handled as descrived above. + The other part of the string will be regarded as a timeout (in + nanoseconds) such that the wakeup source will be automatically + deactivated after it has expired. The timeout, if present, is + set regardless of the current state of the wakeup source object + in question. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of it that are active at + the moment, separated with spaces. + + +What: /sys/power/wake_unlock +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wake_unlock file allows user space to deactivate + wakeup sources created with the help of /sys/power/wake_lock. + When a string is written to /sys/power/wake_unlock, it will be + assumed to represent the name of a wakeup source to deactivate. + If a wakeup source object of that name exists and is active at + the moment, it will be deactivated. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of /sys/power/wake_lock + that are inactive at the moment, separated with spaces. diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2595b8d8fe1f..cbb463b3a750 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -133,6 +133,7 @@ void wakeup_source_add(struct wakeup_source *ws) spin_lock_init(&ws->lock); setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws); ws->active = false; + ws->last_time = ktime_get(); spin_lock_irq(&events_lock); list_add_rcu(&ws->entry, &wakeup_sources); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 67947083f842..1d534076d33a 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -111,6 +111,14 @@ config PM_AUTOSLEEP Allow the kernel to trigger a system transition into a global sleep state automatically whenever there are no active wakeup sources. +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 010b2f7e148c..29472bff11ef 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/main.c b/kernel/power/main.c index ba6a5645952d..54ec071de337 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -431,6 +431,43 @@ static ssize_t autosleep_store(struct kobject *kobj, power_attr(autosleep); #endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -487,6 +524,10 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_AUTOSLEEP &autosleep_attr.attr, #endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif diff --git a/kernel/power/power.h b/kernel/power/power.h index 4cf80fa115d9..b0bd4beaebfe 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -282,3 +282,12 @@ static inline void pm_autosleep_unlock(void) {} static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } #endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 000000000000..579700665e8c --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,215 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define WL_NUMBER_LIMIT 100 +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; + struct list_head lru; +}; + +static struct rb_root wakelocks_tree = RB_ROOT; +static LIST_HEAD(wakelocks_lru_list); +static unsigned int number_of_wakelocks; +static unsigned int wakelocks_gc_count; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (number_of_wakelocks > WL_NUMBER_LIMIT) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + list_add(&wl->lru, &wakelocks_lru_list); + number_of_wakelocks++; + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + list_move(&wl->lru, &wakelocks_lru_list); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now = ktime_get(); + + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + number_of_wakelocks--; + } + } + wakelocks_gc_count = 0; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + list_move(&wl->lru, &wakelocks_lru_list); + if (++wakelocks_gc_count > WL_GC_COUNT_MAX) + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} From 4d7e30d98939a0340022ccd49325a3d70f7e0238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Tue, 1 May 2012 21:33:34 +0200 Subject: [PATCH 09/13] epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a wakeup_source will be active to prevent suspend. This can be used to handle wakeup events from a driver that support poll, e.g. input, if that driver wakes up the waitqueue passed to epoll before allowing suspend. Signed-off-by: Arve Hjønnevåg Reviewed-by: NeilBrown Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 90 ++++++++++++++++++++++++++++++++++++-- include/linux/capability.h | 5 ++- include/linux/eventpoll.h | 12 +++++ 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..2cf0f2153be5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +155,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +198,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1115,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1260,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * Get current event bits. We can safely use the file* here because @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + goto error_tgt_fput; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb229..c398cff3dab7 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda0..6f8be328770a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) From 040e5bf65e1ee66266bc314c5965518a7c21ff36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 4 May 2012 00:14:21 +0200 Subject: [PATCH 10/13] PM / Sleep: Fix a mistake in a conditional in autosleep_store() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The condition check in autosleep_store() is incorrect and prevents /sys/power/autosleep from working as advertised. Fix that. [rjw: Added the changelog.] Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 54ec071de337..428f8a034e96 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -422,7 +422,7 @@ static ssize_t autosleep_store(struct kobject *kobj, int error; if (state == PM_SUSPEND_ON - && !(strncmp(buf, "off", 3) && strncmp(buf, "off\n", 4))) + && strcmp(buf, "off") && strcmp(buf, "off\n")) return -EINVAL; error = pm_autosleep_set_state(state); From 6237dd132d4eb408ffa80830fe395448e5657ab0 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Wed, 2 May 2012 14:33:37 +0200 Subject: [PATCH 11/13] PM / Documentation: suspend-and-cpuhotplug.txt: Fix typo sysfs was expected in this context. Signed-off-by: Marcos Paulo de Souza Acked-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- Documentation/power/suspend-and-cpuhotplug.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index f28f9a6f0347..e13dafc8e8f1 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -29,7 +29,7 @@ More details follow: Write 'mem' to /sys/power/state - syfs file + sysfs file | v Acquire pm_mutex lock From c73893e2ca731b4a81ae59246ab57979aa188777 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 May 2012 21:57:20 +0200 Subject: [PATCH 12/13] PM / Sleep: Make the limit of user space wakeup sources configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to configure out the check against the limit of user space wakeup sources for debugging and default Android builds. Signed-off-by: Rafael J. Wysocki Acked-by: Arve Hjønnevåg --- kernel/power/Kconfig | 6 ++++++ kernel/power/wakelock.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 1d534076d33a..08783eda9ce4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -119,6 +119,12 @@ config PM_WAKELOCKS Allow user space to create, activate and deactivate wakeup source objects with the help of a sysfs-based interface. +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 579700665e8c..dc34b9d3b7d8 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -17,7 +17,6 @@ #include #include -#define WL_NUMBER_LIMIT 100 #define WL_GC_COUNT_MAX 100 #define WL_GC_TIME_SEC 300 @@ -32,7 +31,6 @@ struct wakelock { static struct rb_root wakelocks_tree = RB_ROOT; static LIST_HEAD(wakelocks_lru_list); -static unsigned int number_of_wakelocks; static unsigned int wakelocks_gc_count; ssize_t pm_show_wakelocks(char *buf, bool show_active) @@ -58,6 +56,29 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) return (str - buf); } +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + static struct wakelock *wakelock_lookup_add(const char *name, size_t len, bool add_if_not_found) { @@ -85,7 +106,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, if (!add_if_not_found) return ERR_PTR(-EINVAL); - if (number_of_wakelocks > WL_NUMBER_LIMIT) + if (wakelocks_limit_exceeded()) return ERR_PTR(-ENOSPC); /* Not found, we have to add a new one. */ @@ -103,7 +124,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); list_add(&wl->lru, &wakelocks_lru_list); - number_of_wakelocks++; + increment_wakelocks_number(); return wl; } @@ -175,7 +196,7 @@ static void wakelocks_gc(void) list_del(&wl->lru); kfree(wl->name); kfree(wl); - number_of_wakelocks--; + decrement_wakelocks_number(); } } wakelocks_gc_count = 0; From 4e585d25e120f1eae0a3a8bf8f6ebc7692afec18 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 May 2012 21:57:28 +0200 Subject: [PATCH 13/13] PM / Sleep: User space wakeup sources garbage collector Kconfig option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to configure out the user space wakeup sources garbage collector for debugging and default Android builds. Signed-off-by: Rafael J. Wysocki Acked-by: Arve Hjønnevåg --- kernel/power/Kconfig | 5 ++ kernel/power/wakelock.c | 101 ++++++++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 08783eda9ce4..8f9b4eb974e0 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -125,6 +125,11 @@ config PM_WAKELOCKS_LIMIT default 100 depends on PM_WAKELOCKS +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index dc34b9d3b7d8..c8fba3380076 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -17,21 +17,18 @@ #include #include -#define WL_GC_COUNT_MAX 100 -#define WL_GC_TIME_SEC 300 - static DEFINE_MUTEX(wakelocks_lock); struct wakelock { char *name; struct rb_node node; struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC struct list_head lru; +#endif }; static struct rb_root wakelocks_tree = RB_ROOT; -static LIST_HEAD(wakelocks_lru_list); -static unsigned int wakelocks_gc_count; ssize_t pm_show_wakelocks(char *buf, bool show_active) { @@ -79,6 +76,61 @@ static inline void increment_wakelocks_number(void) {} static inline void decrement_wakelocks_number(void) {} #endif /* CONFIG_PM_WAKELOCKS_LIMIT */ +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + static struct wakelock *wakelock_lookup_add(const char *name, size_t len, bool add_if_not_found) { @@ -123,7 +175,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, wakeup_source_add(&wl->ws); rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); - list_add(&wl->lru, &wakelocks_lru_list); + wakelocks_lru_add(wl); increment_wakelocks_number(); return wl; } @@ -166,42 +218,13 @@ int pm_wake_lock(const char *buf) __pm_stay_awake(&wl->ws); } - list_move(&wl->lru, &wakelocks_lru_list); + wakelocks_lru_most_recent(wl); out: mutex_unlock(&wakelocks_lock); return ret; } -static void wakelocks_gc(void) -{ - struct wakelock *wl, *aux; - ktime_t now = ktime_get(); - - list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { - u64 idle_time_ns; - bool active; - - spin_lock_irq(&wl->ws.lock); - idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); - active = wl->ws.active; - spin_unlock_irq(&wl->ws.lock); - - if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) - break; - - if (!active) { - wakeup_source_remove(&wl->ws); - rb_erase(&wl->node, &wakelocks_tree); - list_del(&wl->lru); - kfree(wl->name); - kfree(wl); - decrement_wakelocks_number(); - } - } - wakelocks_gc_count = 0; -} - int pm_wake_unlock(const char *buf) { struct wakelock *wl; @@ -226,9 +249,9 @@ int pm_wake_unlock(const char *buf) goto out; } __pm_relax(&wl->ws); - list_move(&wl->lru, &wakelocks_lru_list); - if (++wakelocks_gc_count > WL_GC_COUNT_MAX) - wakelocks_gc(); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); out: mutex_unlock(&wakelocks_lock);