OpenCloudOS-Kernel/include/linux/suspend.h

322 lines
12 KiB
C
Raw Normal View History

#ifndef _LINUX_SUSPEND_H
#define _LINUX_SUSPEND_H
#include <linux/swap.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/mm.h>
#include <asm/errno.h>
#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
extern void pm_set_vt_switch(int);
extern int pm_prepare_console(void);
extern void pm_restore_console(void);
#else
static inline void pm_set_vt_switch(int do_switch)
{
}
static inline int pm_prepare_console(void)
{
return 0;
}
static inline void pm_restore_console(void)
{
}
#endif
typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
/**
* struct platform_suspend_ops - Callbacks for managing platform dependent
* system sleep states.
*
* @valid: Callback to determine if given system sleep state is supported by
* the platform.
* Valid (ie. supported) states are advertised in /sys/power/state. Note
* that it still may be impossible to enter given system sleep state if the
* conditions aren't right.
* There is the %suspend_valid_only_mem function available that can be
* assigned to this if the platform only supports mem sleep.
*
* @begin: Initialise a transition to given system sleep state.
* @begin() is executed right prior to suspending devices. The information
* conveyed to the platform code by @begin() should be disregarded by it as
* soon as @end() is executed. If @begin() fails (ie. returns nonzero),
* @prepare(), @enter() and @finish() will not be called by the PM core.
* This callback is optional. However, if it is implemented, the argument
* passed to @enter() is redundant and should be ignored.
*
* @prepare: Prepare the platform for entering the system sleep state indicated
* by @begin().
* @prepare() is called right after devices have been suspended (ie. the
* appropriate .suspend() method has been executed for each device) and
* before device drivers' late suspend callbacks are executed. It returns
* 0 on success or a negative error code otherwise, in which case the
* system cannot enter the desired sleep state (@prepare_late(), @enter(),
* and @wake() will not be called in that case).
*
* @prepare_late: Finish preparing the platform for entering the system sleep
* state indicated by @begin().
* @prepare_late is called before disabling nonboot CPUs and after
* device drivers' late suspend callbacks have been executed. It returns
* 0 on success or a negative error code otherwise, in which case the
* system cannot enter the desired sleep state (@enter() will not be
* executed).
*
* @enter: Enter the system sleep state indicated by @begin() or represented by
* the argument if @begin() is not implemented.
* This callback is mandatory. It returns 0 on success or a negative
* error code otherwise, in which case the system cannot enter the desired
* sleep state.
*
* @wake: Called when the system has just left a sleep state, right after
* the nonboot CPUs have been enabled and before device drivers' early
* resume callbacks are executed.
* This callback is optional, but should be implemented by the platforms
* that implement @prepare_late(). If implemented, it is always called
* after @prepare_late and @enter(), even if one of them fails.
*
* @finish: Finish wake-up of the platform.
* @finish is called right prior to calling device drivers' regular suspend
* callbacks.
* This callback is optional, but should be implemented by the platforms
* that implement @prepare(). If implemented, it is always called after
* @enter() and @wake(), even if any of them fails. It is executed after
* a failing @prepare.
*
* @end: Called by the PM core right after resuming devices, to indicate to
* the platform that the system has returned to the working state or
* the transition to the sleep state has been aborted.
* This callback is optional, but should be implemented by the platforms
* that implement @begin(). Accordingly, platforms implementing @begin()
* should also provide a @end() which cleans up transitions aborted before
* @enter().
*
* @recover: Recover the platform from a suspend failure.
* Called by the PM core if the suspending of devices fails.
* This callback is optional and should only be implemented by platforms
* which require special recovery actions in that situation.
*/
struct platform_suspend_ops {
int (*valid)(suspend_state_t state);
int (*begin)(suspend_state_t state);
int (*prepare)(void);
int (*prepare_late)(void);
int (*enter)(suspend_state_t state);
void (*wake)(void);
void (*finish)(void);
void (*end)(void);
void (*recover)(void);
};
#ifdef CONFIG_SUSPEND
/**
* suspend_set_ops - set platform dependent suspend operations
* @ops: The new suspend operations to set.
*/
extern void suspend_set_ops(const struct platform_suspend_ops *ops);
extern int suspend_valid_only_mem(suspend_state_t state);
/**
* arch_suspend_disable_irqs - disable IRQs for suspend
*
* Disables IRQs (in the default case). This is a weak symbol in the common
* code and thus allows architectures to override it if more needs to be
* done. Not called for suspend to disk.
*/
extern void arch_suspend_disable_irqs(void);
/**
* arch_suspend_enable_irqs - enable IRQs after suspend
*
* Enables IRQs (in the default case). This is a weak symbol in the common
* code and thus allows architectures to override it if more needs to be
* done. Not called for suspend to disk.
*/
extern void arch_suspend_enable_irqs(void);
extern int pm_suspend(suspend_state_t state);
#else /* !CONFIG_SUSPEND */
#define suspend_valid_only_mem NULL
static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
#endif /* !CONFIG_SUSPEND */
[PATCH] swsusp: Improve handling of highmem Currently swsusp saves the contents of highmem pages by copying them to the normal zone which is quite inefficient (eg. it requires two normal pages to be used for saving one highmem page). This may be improved by using highmem for saving the contents of saveable highmem pages. Namely, during the suspend phase of the suspend-resume cycle we try to allocate as many free highmem pages as there are saveable highmem pages. If there are not enough highmem image pages to store the contents of all of the saveable highmem pages, some of them will be stored in the "normal" memory. Next, we allocate as many free "normal" pages as needed to store the (remaining) image data. We use a memory bitmap to mark the allocated free pages (ie. highmem as well as "normal" image pages). Now, we use another memory bitmap to mark all of the saveable pages (highmem as well as "normal") and the contents of the saveable pages are copied into the image pages. Then, the second bitmap is used to save the pfns corresponding to the saveable pages and the first one is used to save their data. During the resume phase the pfns of the pages that were saveable during the suspend are loaded from the image and used to mark the "unsafe" page frames. Next, we try to allocate as many free highmem page frames as to load all of the image data that had been in the highmem before the suspend and we allocate so many free "normal" page frames that the total number of allocated free pages (highmem and "normal") is equal to the size of the image. While doing this we have to make sure that there will be some extra free "normal" and "safe" page frames for two lists of PBEs constructed later. Now, the image data are loaded, if possible, into their "original" page frames. The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in one of two lists of PBEs. One list of PBEs is for the copies of "normal" suspend pages (ie. "normal" pages that were saveable during the suspend) and it is used in the same way as previously (ie. by the architecture-dependent parts of swsusp). The other list of PBEs is for the copies of highmem suspend pages. The pages in this list are restored (in a reversible way) right before the arch-dependent code is called. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:34:18 +08:00
/* struct pbe is used for creating lists of pages that should be restored
* atomically during the resume from disk, because the page frames they have
* occupied before the suspend are in use.
*/
struct pbe {
[PATCH] swsusp: Improve handling of highmem Currently swsusp saves the contents of highmem pages by copying them to the normal zone which is quite inefficient (eg. it requires two normal pages to be used for saving one highmem page). This may be improved by using highmem for saving the contents of saveable highmem pages. Namely, during the suspend phase of the suspend-resume cycle we try to allocate as many free highmem pages as there are saveable highmem pages. If there are not enough highmem image pages to store the contents of all of the saveable highmem pages, some of them will be stored in the "normal" memory. Next, we allocate as many free "normal" pages as needed to store the (remaining) image data. We use a memory bitmap to mark the allocated free pages (ie. highmem as well as "normal" image pages). Now, we use another memory bitmap to mark all of the saveable pages (highmem as well as "normal") and the contents of the saveable pages are copied into the image pages. Then, the second bitmap is used to save the pfns corresponding to the saveable pages and the first one is used to save their data. During the resume phase the pfns of the pages that were saveable during the suspend are loaded from the image and used to mark the "unsafe" page frames. Next, we try to allocate as many free highmem page frames as to load all of the image data that had been in the highmem before the suspend and we allocate so many free "normal" page frames that the total number of allocated free pages (highmem and "normal") is equal to the size of the image. While doing this we have to make sure that there will be some extra free "normal" and "safe" page frames for two lists of PBEs constructed later. Now, the image data are loaded, if possible, into their "original" page frames. The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in one of two lists of PBEs. One list of PBEs is for the copies of "normal" suspend pages (ie. "normal" pages that were saveable during the suspend) and it is used in the same way as previously (ie. by the architecture-dependent parts of swsusp). The other list of PBEs is for the copies of highmem suspend pages. The pages in this list are restored (in a reversible way) right before the arch-dependent code is called. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:34:18 +08:00
void *address; /* address of the copy */
void *orig_address; /* original address of a page */
struct pbe *next;
};
/* mm/page_alloc.c */
extern void mark_free_pages(struct zone *zone);
/**
* struct platform_hibernation_ops - hibernation platform support
*
* The methods in this structure allow a platform to carry out special
* operations required by it during a hibernation transition.
*
* All the methods below, except for @recover(), must be implemented.
*
* @begin: Tell the platform driver that we're starting hibernation.
* Called right after shrinking memory and before freezing devices.
*
* @end: Called by the PM core right after resuming devices, to indicate to
* the platform that the system has returned to the working state.
*
* @pre_snapshot: Prepare the platform for creating the hibernation image.
* Called right after devices have been frozen and before the nonboot
* CPUs are disabled (runs with IRQs on).
*
* @finish: Restore the previous state of the platform after the hibernation
* image has been created *or* put the platform into the normal operation
* mode after the hibernation (the same method is executed in both cases).
* Called right after the nonboot CPUs have been enabled and before
* thawing devices (runs with IRQs on).
*
* @prepare: Prepare the platform for entering the low power state.
* Called right after the hibernation image has been saved and before
* devices are prepared for entering the low power state.
*
* @enter: Put the system into the low power state after the hibernation image
* has been saved to disk.
* Called after the nonboot CPUs have been disabled and all of the low
* level devices have been shut down (runs with IRQs off).
*
* @leave: Perform the first stage of the cleanup after the system sleep state
* indicated by @set_target() has been left.
* Called right after the control has been passed from the boot kernel to
* the image kernel, before the nonboot CPUs are enabled and before devices
* are resumed. Executed with interrupts disabled.
*
* @pre_restore: Prepare system for the restoration from a hibernation image.
* Called right after devices have been frozen and before the nonboot
* CPUs are disabled (runs with IRQs on).
*
* @restore_cleanup: Clean up after a failing image restoration.
* Called right after the nonboot CPUs have been enabled and before
* thawing devices (runs with IRQs on).
*
* @recover: Recover the platform from a failure to suspend devices.
* Called by the PM core if the suspending of devices during hibernation
* fails. This callback is optional and should only be implemented by
* platforms which require special recovery actions in that situation.
*/
struct platform_hibernation_ops {
int (*begin)(void);
void (*end)(void);
int (*pre_snapshot)(void);
void (*finish)(void);
int (*prepare)(void);
int (*enter)(void);
void (*leave)(void);
swsusp: introduce restore platform operations At least on some machines it is necessary to prepare the ACPI firmware for the restoration of the system memory state from the hibernation image if the "platform" mode of hibernation has been used. Namely, in that cases we need to disable the GPEs before replacing the "boot" kernel with the "frozen" kernel (cf. http://bugzilla.kernel.org/show_bug.cgi?id=7887). After the restore they will be re-enabled by hibernation_ops->finish(), but if the restore fails, they have to be re-enabled by the restore code explicitly. For this purpose we can introduce two additional hibernation operations, called pre_restore() and restore_cleanup() and call them from the restore code path. Still, they should be called if the "platform" mode of hibernation has been used, so we need to pass the information about the hibernation mode from the "frozen" kernel to the "boot" kernel in the image header. Apparently, we can't drop the disabling of GPEs before the restore because of Bug #7887 .  We also can't do it unconditionally, because the GPEs wouldn't have been enabled after a successful restore if the suspend had been done in the 'shutdown' or 'reboot' mode. In principle we could (and probably should) unconditionally disable the GPEs before each snapshot creation *and* before the restore, but then we'd have to unconditionally enable them after the snapshot creation as well as after the restore (or restore failure)   Still, for this purpose we'd need to modify acpi_enter_sleep_state_prep() and acpi_leave_sleep_state() and we'd have to introduce some mechanism synchronizing the disablind/enabling of the GPEs with the device drivers' .suspend()/.resume() routines and with disable_/enable_nonboot_cpus().  However, this would have affected the suspend (ie. s2ram) code as well as the hibernation, which I'd like to avoid in this patch series. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Nigel Cunningham <nigel@nigel.suspend2.net> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-19 16:47:30 +08:00
int (*pre_restore)(void);
void (*restore_cleanup)(void);
void (*recover)(void);
};
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
static inline void __init register_nosave_region(unsigned long b, unsigned long e)
{
__register_nosave_region(b, e, 0);
}
static inline void __init register_nosave_region_late(unsigned long b, unsigned long e)
{
__register_nosave_region(b, e, 1);
}
extern int swsusp_page_is_forbidden(struct page *);
extern void swsusp_set_page_free(struct page *);
extern void swsusp_unset_page_free(struct page *);
extern unsigned long get_safe_page(gfp_t gfp_mask);
extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
extern int hibernate(void);
extern bool system_entering_hibernation(void);
#else /* CONFIG_HIBERNATION */
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
static inline void swsusp_set_page_free(struct page *p) {}
static inline void swsusp_unset_page_free(struct page *p) {}
static inline void hibernation_set_ops(const struct platform_hibernation_ops *ops) {}
static inline int hibernate(void) { return -ENOSYS; }
static inline bool system_entering_hibernation(void) { return false; }
#endif /* CONFIG_HIBERNATION */
#ifdef CONFIG_PM_SLEEP
void save_processor_state(void);
void restore_processor_state(void);
/* kernel/power/main.c */
extern int register_pm_notifier(struct notifier_block *nb);
extern int unregister_pm_notifier(struct notifier_block *nb);
#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
{ .notifier_call = fn, .priority = pri }; \
register_pm_notifier(&fn##_nb); \
}
PM: Make it possible to avoid races between wakeup and system sleep One of the arguments during the suspend blockers discussion was that the mainline kernel didn't contain any mechanisms making it possible to avoid races between wakeup and system suspend. Generally, there are two problems in that area. First, if a wakeup event occurs exactly when /sys/power/state is being written to, it may be delivered to user space right before the freezer kicks in, so the user space consumer of the event may not be able to process it before the system is suspended. Second, if a wakeup event occurs after user space has been frozen, it is not generally guaranteed that the ongoing transition of the system into a sleep state will be aborted. To address these issues introduce a new global sysfs attribute, /sys/power/wakeup_count, associated with a running counter of wakeup events and three helper functions, pm_stay_awake(), pm_relax(), and pm_wakeup_event(), that may be used by kernel subsystems to control the behavior of this attribute and to request the PM core to abort system transitions into a sleep state already in progress. The /sys/power/wakeup_count file may be read from or written to by user space. Reads will always succeed (unless interrupted by a signal) and return the current value of the wakeup events counter. Writes, however, will only succeed if the written number is equal to the current value of the wakeup events counter. If a write is successful, it will cause the kernel to save the current value of the wakeup events counter and to abort the subsequent system transition into a sleep state if any wakeup events are reported after the write has returned. [The assumption is that before writing to /sys/power/state user space will first read from /sys/power/wakeup_count. Next, user space consumers of wakeup events will have a chance to acknowledge or veto the upcoming system transition to a sleep state. Finally, if the transition is allowed to proceed, /sys/power/wakeup_count will be written to and if that succeeds, /sys/power/state will be written to as well. Still, if any wakeup events are reported to the PM core by kernel subsystems after that point, the transition will be aborted.] Additionally, put a wakeup events counter into struct dev_pm_info and make these per-device wakeup event counters available via sysfs, so that it's possible to check the activity of various wakeup event sources within the kernel. To illustrate how subsystems can use pm_wakeup_event(), make the low-level PCI runtime PM wakeup-handling code use it. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org> Acked-by: Greg Kroah-Hartman <gregkh@suse.de> Acked-by: markgross <markgross@thegnar.org> Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
2010-07-06 04:43:53 +08:00
/* drivers/base/power/wakeup.c */
extern bool events_check_enabled;
extern bool pm_wakeup_pending(void);
extern bool pm_get_wakeup_count(unsigned int *count);
extern bool pm_save_wakeup_count(unsigned int count);
#else /* !CONFIG_PM_SLEEP */
static inline int register_pm_notifier(struct notifier_block *nb)
{
return 0;
}
static inline int unregister_pm_notifier(struct notifier_block *nb)
{
return 0;
}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
static inline bool pm_wakeup_pending(void) { return false; }
#endif /* !CONFIG_PM_SLEEP */
extern struct mutex pm_mutex;
#ifndef CONFIG_HIBERNATE_CALLBACKS
static inline void lock_system_sleep(void) {}
static inline void unlock_system_sleep(void) {}
#else
/* Let some subsystems like memory hotadd exclude hibernation */
static inline void lock_system_sleep(void)
{
mutex_lock(&pm_mutex);
}
static inline void unlock_system_sleep(void)
{
mutex_unlock(&pm_mutex);
}
#endif
kexec jump: save/restore device state This patch implements devices state save/restore before after kexec. This patch together with features in kexec_jump patch can be used for following: - A simple hibernation implementation without ACPI support. You can kexec a hibernating kernel, save the memory image of original system and shutdown the system. When resuming, you restore the memory image of original system via ordinary kexec load then jump back. - Kernel/system debug through making system snapshot. You can make system snapshot, jump back, do some thing and make another system snapshot. - Cooperative multi-kernel/system. With kexec jump, you can switch between several kernels/systems quickly without boot process except the first time. This appears like swap a whole kernel/system out/in. - A general method to call program in physical mode (paging turning off). This can be used to invoke BIOS code under Linux. The following user-space tools can be used with kexec jump: - kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 - makedumpfile with patches are used as memory image saving tool, it can exclude free pages from original kernel memory image file. The patches and the precompiled makedumpfile can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-src_cvs_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-patches_cvs_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile_cvs_kh10 - An initramfs image can be used as the root file system of kexeced kernel. An initramfs image built with "BuildRoot" can be downloaded from the following URL: initramfs image: http://khibernation.sourceforge.net/download/release_v10/initramfs/rootfs_cvs_kh10.gz All user space tools above are included in the initramfs image. Usage example of simple hibernation: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_RELOCATABLE=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_PM=y CONFIG_HIBERNATION=y CONFIG_KEXEC_JUMP=y 2. Build an initramfs image contains kexec-tool and makedumpfile, or download the pre-built initramfs image, called rootfs.gz in following text. 3. Prepare a partition to save memory image of original kernel, called hibernating partition in following text. 4. Boot kernel compiled in step 1 (kernel A). 5. In the kernel A, load kernel compiled in step 1 (kernel B) with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context /boot/bzImage --mem-min=0x100000 --mem-max=0xffffff --initrd=rootfs.gz 6. Boot the kernel B with following shell command line: /sbin/kexec -e 7. The kernel B will boot as normal kexec. In kernel B the memory image of kernel A can be saved into hibernating partition as follow: jump_back_entry=`cat /proc/cmdline | tr ' ' '\n' | grep kexec_jump_back_entry | cut -d '='` echo $jump_back_entry > kexec_jump_back_entry cp /proc/vmcore dump.elf Then you can shutdown the machine as normal. 8. Boot kernel compiled in step 1 (kernel C). Use the rootfs.gz as root file system. 9. In kernel C, load the memory image of kernel A as follow: /sbin/kexec -l --args-none --entry=`cat kexec_jump_back_entry` dump.elf 10. Jump back to the kernel A as follow: /sbin/kexec -e Then, kernel A is resumed. Implementation point: To support jumping between two kernels, before jumping to (executing) the new kernel and jumping back to the original kernel, the devices are put into quiescent state, and the state of devices and CPU is saved. After jumping back from kexeced kernel and jumping to the new kernel, the state of devices and CPU are restored accordingly. The devices/CPU state save/restore code of software suspend is called to implement corresponding function. Known issues: - Because the segment number supported by sys_kexec_load is limited, hibernation image with many segments may not be load. This is planned to be eliminated by adding a new flag to sys_kexec_load to make a image can be loaded with multiple sys_kexec_load invoking. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Nigel Cunningham <nigel@nigel.suspend2.net> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-26 10:45:10 +08:00
#endif /* _LINUX_SUSPEND_H */