Linux kernel suspend resume学习:2.6.35与3.0.35比较
最近在学习linux kernel中suspend和resume的处理。
只是一味的看代码,有点枯燥,刚好有两个项目使用了不同的内核版本,就以比较这两个间的处理差别为线索进行学习。
由于列举了很多代码,为了保持连续性,整篇文章还是放到了一个blog中。
首先看看公开出去的接口,都是文件/sys/power/state。
读该文件可以获取可能取值。
写该文件可以实现状态改变。
/sys/power/state定义的地方:
power_attr(state);
两个内核版本中,power_attr的定义相同。
power_attr的定义:
#define power_attr(_name) \
static struct kobj_attribute _name##_attr = { \
.attr = { \
.name = __stringify(_name), \
.mode = 0644, \
}, \
.show = _name##_show, \
.store = _name##_store, \
}
power_attr就是往sysfs中添加一个文件,操作函数:
.show = _name##_show,
.store = _name##_store,
对应到state:
.show = state_show,
.store = state_store,
内核中这两个函数的注释:
/**
* state - control system power state.
*
* show() returns what states are supported, which is hard-coded to
* 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
* 'disk' (Suspend-to-Disk).
*
* store() accepts one of those strings, translates it into the
* proper enumerated value, and initiates a suspend transition.
*/
state_show其实就是将数组pm_states的内容show出来。
数组pm_states的定义,在两个项目中是不同的。
kernel 2.6.35项目中:
const char *const pm_states[PM_SUSPEND_MAX] = {
#ifdef CONFIG_EARLYSUSPEND
[PM_SUSPEND_ON] = "on",
#endif
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
};
kernel 3.0.35项目中:
const char *const pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
};
关于这些 state 的介绍,请参考 kernel 中稳定:
Documentation\power\states.txt
函数state_store定义,两个项目中的差别也就是kernel 2.6.35项目中多了对CONFIG_EARLYSUSPEND的处理。
将kernel 2.6.35项目中state_store的定义列出来:
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
#ifdef CONFIG_SUSPEND
/*
看一下PM_SUSPEND_ON及几个相关的定义:
typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
不难理解,如果有on的话,肯定要从on开始遍历
*/
#ifdef CONFIG_EARLYSUSPEND
suspend_state_t state = PM_SUSPEND_ON;
#else
suspend_state_t state = PM_SUSPEND_STANDBY;
#endif
const char * const *s;
#endif
char *p;
int len;
int error = -EINVAL;
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
/* First, check if we are requested to hibernate */
if (len == 4 && !strncmp(buf, "disk", len)) {
error = hibernate();
goto Exit;
}
#ifdef CONFIG_SUSPEND
for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
break;
}
if (state < PM_SUSPEND_MAX && *s)
#ifdef CONFIG_EARLYSUSPEND
if (state == PM_SUSPEND_ON || valid_state(state)) {
error = 0;
request_suspend_state(state);
}
#else
error = enter_state(state);
#endif
#endif
Exit:
return error ? error : n;
}
函数valid_state是判断是否对指定的state进行了支持:
bool valid_state(suspend_state_t state)
{
/*
* All states need lowlevel support and need to be valid to the lowlevel
* implementation, no valid callback implies that none are valid.
*/
return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
}
suspend_ops在函数suspend_set_ops中被赋值:
/**
* suspend_set_ops - Set the global suspend method table.
* @ops: Pointer to ops structure.
*/
void suspend_set_ops(struct platform_suspend_ops *ops)
{
mutex_lock(&pm_mutex);
suspend_ops = ops;
mutex_unlock(&pm_mutex);
}
一般会在 architecture 相关的pm模块的 init 或者 probe 函数中会调用 suspend_set_ops 。
valid 一般是判断该 architecture 中是否支持该 state 。
接下来看看差异中的 request_suspend_state 函数:
void request_suspend_state(suspend_state_t new_state)
{
unsigned long irqflags;
int old_sleep;
spin_lock_irqsave(&state_lock, irqflags);
old_sleep = state & SUSPEND_REQUESTED;
...
if (new_state == PM_SUSPEND_STANDBY) {
mode = EARLY_SUSPEND_MODE_EINK;
new_state = PM_SUSPEND_MEM;
} else
mode = EARLY_SUSPEND_MODE_NORMAL;
if (!old_sleep && new_state != PM_SUSPEND_ON) {
if ((state & SUSPENDED) && (last_mode != mode)) {
/* flush the workqueue */
spin_unlock_irqrestore(&state_lock, irqflags);
flush_workqueue(suspend_work_queue);
spin_lock_irqsave(&state_lock, irqflags);
}
state |= SUSPEND_REQUESTED;
queue_work(suspend_work_queue, &early_suspend_work); // 将 early_suspend_work 添加到 suspend_work_queue 中
} else if (old_sleep && new_state == PM_SUSPEND_ON) { // 这一次走的是这个分支
state &= ~SUSPEND_REQUESTED;
wake_lock(&main_wake_lock);
queue_work(suspend_work_queue, &late_resume_work); // 将 late_resume_work 添加到 suspend_work_queue
}
if (new_state != PM_SUSPEND_ON)
last_mode = mode;
requested_suspend_state = new_state;
spin_unlock_irqrestore(&state_lock, irqflags);
}
suspend_work_queue 在函数 wakelocks_init 中被创建:
suspend_work_queue = create_singlethread_workqueue("suspend");
函数 wakelocks_init 为 core_initcall :
core_initcall(wakelocks_init);
early_suspend_work 和 late_resume_work 的定义:
static DECLARE_WORK(early_suspend_work, early_suspend);
static DECLARE_WORK(late_resume_work, late_resume);
#define DECLARE_WORK(n, f) \
struct work_struct n = __WORK_INITIALIZER(n, f)
#define __WORK_INITIALIZER(n, f) { \
.data = WORK_DATA_STATIC_INIT(), \
.entry = { &(n).entry, &(n).entry }, \
.func = (f), \
__WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
}
early_suspend 和 late_resume 是处理函数。
两个函数中都对 early_suspend_handlers 进行了处理。
early_suspend 函数中依次调用了 early_suspend_handlers 中的 suspend 函数:
list_for_each_entry(pos, &early_suspend_handlers, link) {
if (pos->suspend != NULL) {
pos->pm_mode = pwr_mode;
pos->suspend(pos);
}
}
late_resume 函数中一次调用了 early_suspend_handlers 中的 resume 函数:
list_for_each_entry_reverse(pos, &early_suspend_handlers, link)
if (pos->resume != NULL)
pos->resume(pos);
early_suspend_handlers 的定义:
static LIST_HEAD(early_suspend_handlers);
函数 register_early_suspend 将 handler 注册到 early_suspend_handlers :
void register_early_suspend(struct early_suspend *handler)
{
struct list_head *pos;
mutex_lock(&early_suspend_lock);
list_for_each(pos, &early_suspend_handlers) {
struct early_suspend *e;
e = list_entry(pos, struct early_suspend, link);
if (e->level > handler->level)
break;
}
list_add_tail(&handler->link, pos);
if ((state & SUSPENDED) && handler->suspend)
handler->suspend(handler);
mutex_unlock(&early_suspend_lock);
}
需要进行early suspend处理的模块调用函数 register_early_suspend 注册 handler .
static struct early_suspend mxc_epdc_earlysuspend = {
.level = EARLY_SUSPEND_LEVEL_DISABLE_FB,
.suspend = mxc_epdc_early_suspend,
.resume = mxc_epdc_late_resume,
};
register_early_suspend(&mxc_epdc_earlysuspend);
看看 queue_work 的实现:
/**
* queue_work - queue work on a workqueue
* @wq: workqueue to use
* @work: work to queue
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
*/
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
int ret;
ret = queue_work_on(get_cpu(), wq, work);
put_cpu();
return ret;
}
/**
* queue_work_on - queue work on specific cpu
* @cpu: CPU number to execute work on
* @wq: workqueue to use
* @work: work to queue
*
* Returns 0 if @work was already on a queue, non-zero otherwise.
*
* We queue the work to a specific CPU, the caller must ensure it
* can't go away.
*/
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
int ret = 0;
if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
BUG_ON(!list_empty(&work->entry));
// __queue_work 最终将 work 添加到指定 cpu_workqueue_struct 中的 worklist 中
__queue_work(wq_per_cpu(wq, cpu), work);
ret = 1;
}
return ret;
}
flush_workqueue 的实现:
/**
* flush_workqueue - ensure that any scheduled work has run to completion.
* @wq: workqueue to flush
*
* Forces execution of the workqueue and blocks until its completion.
* This is typically used in driver shutdown handlers.
*
* We sleep until all works which were queued on entry have been handled,
* but we are not livelocked by new incoming ones.
*
* This function used to run the workqueues itself. Now we just wait for the
* helper threads to do it.
*/
void flush_workqueue(struct workqueue_struct *wq)
{
const struct cpumask *cpu_map = wq_cpu_map(wq);
int cpu;
might_sleep();
lock_map_acquire(&wq->lockdep_map);
lock_map_release(&wq->lockdep_map);
for_each_cpu(cpu, cpu_map)
flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
}
回到 state_store 函数。
我们发现函数中最初有个判断,如果设置的状态为 "disk" ,则调用函数 hibernate 。
根据state.txt中的说明,disk即进入深度睡眠,更省电。
因为其将 snapshot 写入到了 disk 。之后可以 power down 。
来看看 hibernate 的实现。
两个内核版本中稍有差别,kernel 3.0.35中多了一些处理。下面把 kernel 3.0.35的实现列了出来:
/**
* hibernate - Carry out system hibernation, including saving the image.
*/
int hibernate(void)
{
int error;
mutex_lock(&pm_mutex);
/* The snapshot device should not be opened while we're running */
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
error = -EBUSY;
goto Unlock;
}
pm_prepare_console();
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
if (error)
goto Exit;
error = usermodehelper_disable();
if (error)
goto Exit;
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
if (error)
goto Exit;
printk(KERN_INFO "PM: Syncing filesystems ... ");
sys_sync();
printk("done.\n");
error = prepare_processes();
if (error)
goto Finish;
if (hibernation_test(TEST_FREEZER))
goto Thaw;
if (hibernation_testmode(HIBERNATION_TESTPROC))
goto Thaw;
error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
if (error)
goto Thaw;
if (in_suspend) {
unsigned int flags = 0;
if (hibernation_mode == HIBERNATION_PLATFORM)
flags |= SF_PLATFORM_MODE;
if (nocompress) // kernel 2.6.35中无
flags |= SF_NOCOMPRESS_MODE; // kernel 2.6.35中无
pr_debug("PM: writing image.\n");
error = swsusp_write(flags);
swsusp_free();
if (!error)
power_down();
in_suspend = 0; // kernel 2.6.35中无
pm_restore_gfp_mask(); // kernel 2.6.35中无
} else {
pr_debug("PM: Image restored successfully.\n");
}
Thaw:
thaw_processes();
Finish:
free_basic_memory_bitmaps();
usermodehelper_enable();
Exit:
pm_notifier_call_chain(PM_POST_HIBERNATION);
pm_restore_console();
atomic_inc(&snapshot_device_available);
Unlock:
mutex_unlock(&pm_mutex);
return error;
}
先看第一处差别, NOCOMPRESS 相关。
kernel 2.6.35中没有定义 SF_NOCOMPRESS_MODE 。
搜索代码发现,kernel 3.0.35中有3个地方使用了 SF_NOCOMPRESS_MODE 。
分别是判断 swap 是否有足够空间、写入 snapshot 、读取 snapshot 时。
第一处:
在函数 enough_swap 中。功能在注释中已经体现。该函数的实现:
/**
* enough_swap - Make sure we have enough swap to save the image.
*
* Returns TRUE or FALSE after checking the total amount of swap
* space avaiable from the resume partition.
*/
static int enough_swap(unsigned int nr_pages, unsigned int flags)
{
unsigned int free_swap = count_swap_pages(root_swap, 1);
unsigned int required;
pr_debug("PM: Free swap pages: %u\n", free_swap);
// 如果为非压缩模式,请求多少页即需要多少页;压缩模式下需乘以最坏情况下的压缩比
required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
return free_swap > required;
}
看了下 kernel 2.6.35 中的实现,关键代码如下:
return free_swap > nr_pages + PAGES_FOR_IO;
比较两个版本的 kernel 可知,2.6.35中只支持非压缩模式;3.0.35中增加了压缩模式的支持。
第二处:
在函数 swsusp_write 中。关键代码:
/**
* swsusp_write - Write entire image and metadata.
* @flags: flags to pass to the "boot" kernel in the image header
*
* It is important _NOT_ to umount filesystems at this point. We want
* them synced (in case something goes wrong) but we DO not want to mark
* filesystem clean: it is not. (And it does not matter, if we resume
* correctly, we'll mark system clean, anyway.)
*/
int swsusp_write(unsigned int flags)
{
...
pages = snapshot_get_image_size();
error = get_swap_writer(&handle);
...
// 判断 swap 中是否有足够空间
if (!enough_swap(pages, flags)) {
...
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_read_next(&snapshot);
...
header = (struct swsusp_info *)data_of(snapshot);
// 写入元数据
error = swap_write_page(&handle, header, NULL);
if (!error) {
// 写入 snapshot 数据。根据是否设置了 SF_NOCOMPRESS_MODE 分别调用 save_image 和 save_image_lzo
error = (flags & SF_NOCOMPRESS_MODE) ?
save_image(&handle, &snapshot, pages - 1) :
save_image_lzo(&handle, &snapshot, pages - 1);
}
out_finish:
error = swap_writer_finish(&handle, flags, error);
return error;
}
函数 save_image 的注释:
/**
* save_image - save the suspend image data
*/
函数 save_image_lzo 的注释:
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
* @handle: Swap mam handle to use for saving the image.
* @snapshot: Image to read data from.
* @nr_to_write: Number of pages to save.
*/
lzo 压缩算法就不介绍了。具体写入的实现这儿也不深入了。
2.6.35 中函数 swsusp_write 的实现类似,只是在调用 enough_swap 时不会传入 flags 参数;
另外,没有函数 save_image_lzo ,只会调用 save_image 。
函数 swsusp_write 中调用的另外一个重要函数 snapshot_read_next 。其注释:
/**
* snapshot_read_next - used for reading the system memory snapshot.
*
* On the first call to it @handle should point to a zeroed
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
* On success the function returns a positive number. Then, the caller
* is allowed to read up to the returned number of bytes from the memory
* location computed by the data_of() macro.
*
* The function returns 0 to indicate the end of data stream condition,
* and a negative number is returned on error. In such cases the
* structure pointed to by @handle is not updated and should not be used
* any more.
*/
swsusp_write 中第一次调用了函数 snapshot_read_next ,函数 save_image/save_image_lzo 中循环调用 snapshot_read_next 函数,直到读取完 snapshot 。
第三处:
在函数 swsusp_read 中,关键代码:
/**
* swsusp_read - read the hibernation image.
* @flags_p: flags passed by the "frozen" kernel in the image header should
* be written into this memory location
*/
int swsusp_read(unsigned int *flags_p)
{
...
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_write_next(&snapshot);
...
header = (struct swsusp_info *)data_of(snapshot);
error = get_swap_reader(&handle, flags_p);
...
if (!error)
error = swap_read_page(&handle, header, NULL);
if (!error) {
error = (*flags_p & SF_NOCOMPRESS_MODE) ?
load_image(&handle, &snapshot, header->pages - 1) :
load_image_lzo(&handle, &snapshot, header->pages - 1);
}
swap_reader_finish(&handle);
end:
if (!error)
pr_debug("PM: Image successfully loaded\n");
else
pr_debug("PM: Error %d resuming\n", error);
return error;
}
load_image 的注释:
/**
* load_image - load the image using the swap map handle
* @handle and the snapshot handle @snapshot
* (assume there are @nr_pages pages to load)
*/
load_image_lzo 的注释:
/**
* load_image_lzo - Load compressed image data and decompress them with LZO.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
* @nr_to_read: Number of pages to load.
*/
2.6.35 中函数 swsusp_read 的实现类似,只是少了对 SF_NOCOMPRESS_MODE 的处理,没有实现函数 load_image_lzo 。
swsusp_read 函数中调用了另外一个重要函数 snapshot_write_next ,其注释:
/**
* snapshot_write_next - used for writing the system memory snapshot.
*
* On the first call to it @handle should point to a zeroed
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
* On success the function returns a positive number. Then, the caller
* is allowed to write up to the returned number of bytes to the memory
* location computed by the data_of() macro.
*
* The function returns 0 to indicate the "end of file" condition,
* and a negative number is returned on error. In such cases the
* structure pointed to by @handle is not updated and should not be used
* any more.
*/
swsusp_read 函数中第一次调用了 snapshot_write_next , load_image/load_image_lzo 中循环调用 snapshot_write_next ,直到处理完所有的 snapshot 。
第二处差别是在3.0.35中多了:
in_suspend = 0;
搜索代码,发现函数 create_image 中将 in_suspend 设置为了1.
函数 hibernate 调用了函数 hibernation_snapshot 。
函数 hibernation_snapshot 的注释 :
/**
* hibernation_snapshot - Quiesce devices and create a hibernation image.
* @platform_mode: If set, use platform driver to prepare for the transition.
*
* This routine must be called with pm_mutex held.
*/
函数 hibernation_snapshot 调用了函数 create_image 。
函数 create_image 的注释:
/**
* create_image - Create a hibernation image.
* @platform_mode: Whether or not to use the platform driver.
*
* Execute device drivers' .freeze_noirq() callbacks, create a hibernation image
* and execute the drivers' .thaw_noirq() callbacks.
*
* Control reappears in this routine after the subsequent restore.
*/
3.0.35中多了这么一句,难道只是为了防止 hibernate 函数中重复进入 if(hibernate)...
第三处差别是在3.0.35中多了:
pm_restore_gfp_mask();
pm_restore_gfp_mask 的实现及相关定义:
#ifdef CONFIG_PM_SLEEP
/*
* The following functions are used by the suspend/hibernate code to temporarily
* change gfp_allowed_mask in order to avoid using I/O during memory allocations
* while devices are suspended. To avoid races with the suspend/hibernate code,
* they should always be called with pm_mutex held (gfp_allowed_mask also should
* only be modified with pm_mutex held, unless the suspend/hibernate code is
* guaranteed not to run in parallel with that modification).
*/
static gfp_t saved_gfp_mask;
void pm_restore_gfp_mask(void)
{
WARN_ON(!mutex_is_locked(&pm_mutex));
if (saved_gfp_mask) {
gfp_allowed_mask = saved_gfp_mask;
saved_gfp_mask = 0;
}
}
void pm_restrict_gfp_mask(void)
{
WARN_ON(!mutex_is_locked(&pm_mutex));
WARN_ON(saved_gfp_mask);
saved_gfp_mask = gfp_allowed_mask;
gfp_allowed_mask &= ~GFP_IOFS;
}
#endif /* CONFIG_PM_SLEEP */
回头看看 hibernate 函数。
/* The snapshot device should not be opened while we're running */
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
error = -EBUSY;
goto Unlock;
}
注释的意思是,我们在执行 hibernate 的时候,不允许别人再打开 snapshot 设备。
static struct miscdevice snapshot_device = {
.minor = SNAPSHOT_MINOR,
.name = "snapshot",
.fops = &snapshot_fops,
};
snapshot_fops 的定义:
static const struct file_operations snapshot_fops = {
.open = snapshot_open,
.release = snapshot_release,
.read = snapshot_read,
.write = snapshot_write,
.llseek = no_llseek,
.unlocked_ioctl = snapshot_ioctl,
};
打开 snapshot 设备用的就是函数 snapshot_open 了。
实现在我们 running 的时候不让 snapshot 设备被打开的方法是通过变量 snapshot_device_available ,其定义:
atomic_t snapshot_device_available = ATOMIC_INIT(1);
snapshot_open 函数中有以下语句:
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
error = -EBUSY;
goto Unlock;
}
add 个 -1 ,也就相当于减1操作。
继续 hibernate 函数。
// console 相关处理
pm_prepare_console();
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
pm_notifier_call_chain 函数经过多层调用,调用到了函数 __blocking_notifier_call_chain ,其注释:
/**
* __blocking_notifier_call_chain - Call functions in a blocking notifier chain
* @nh: Pointer to head of the blocking notifier chain
* @val: Value passed unmodified to notifier function
* @v: Pointer passed unmodified to notifier function
* @nr_to_call: See comment for notifier_call_chain.
* @nr_calls: See comment for notifier_call_chain.
*
* Calls each function in a notifier chain in turn. The functions
* run in a process context, so they are allowed to block.
*
* If the return value of the notifier can be and'ed
* with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
* will return immediately, with the return value of
* the notifier function which halted execution.
* Otherwise the return value is the return value
* of the last notifier function called.
*/
继续 hibernate 函数。
/**
* usermodehelper_disable - prevent new helpers from being started
*/
error = usermodehelper_disable();
/* Allocate memory management structures */
error = create_basic_memory_bitmaps();
函数 create_basic_memory_bitmaps 的注释:
/**
* create_basic_memory_bitmaps - create bitmaps needed for marking page
* frames that should not be saved and free page frames. The pointers
* forbidden_pages_map and free_pages_map are only modified if everything
* goes well, because we don't want the bits to be used before both bitmaps
* are set up.
*/
继续 hibernate 函数。
error = prepare_processes();
prepare_processes 函数的实现:
static int prepare_processes(void)
{
int error = 0;
if (freeze_processes()) {
error = -EBUSY;
thaw_processes();
}
return error;
}
可见函数 prepare_processes 的功能为:
尝试冷冻进程,如果失败,则解冻进程,并返回 -EBUSY 。
如何冷冻进程的先不看了。
继续 hibernate 函数。
// 如果只是 debug ,那就只简单 delay 一会
if (hibernation_test(TEST_FREEZER))
goto Thaw;
// 与上面类似
if (hibernation_testmode(HIBERNATION_TESTPROC))
goto Thaw;
// 此函数前面见到过,功能是让 devices 都静止,并创建 hibernation image 。它还将 in_suspend 设置为了1
error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
// hibernation_snapshot 被成功调用, in_suspend 肯定非0
if (in_suspend) {
unsigned int flags = 0;
if (hibernation_mode == HIBERNATION_PLATFORM)
flags |= SF_PLATFORM_MODE;
if (nocompress) // 是否是非压缩的
flags |= SF_NOCOMPRESS_MODE;
pr_debug("PM: writing image.\n");
// 函数 swsusp_write 前面见过,将 snapshot 写入到 swap
error = swsusp_write(flags);
/**
* swsusp_free - free pages allocated for the suspend.
*
* Suspend pages are alocated before the atomic copy is made, so we
* need to release them after the resume.
*/
swsusp_free();
if (!error)
power_down();
in_suspend = 0;
pm_restore_gfp_mask();
} else {
pr_debug("PM: Image restored successfully.\n");
}
函数 power_down 的实现:
/**
* power_down - Shut the machine down for hibernation.
*
* Use the platform driver, if configured, to put the system into the sleep
* state corresponding to hibernation, or try to power it off or reboot,
* depending on the value of hibernation_mode.
*/
static void power_down(void)
{
switch (hibernation_mode) {
case HIBERNATION_TEST:
case HIBERNATION_TESTPROC:
break;
case HIBERNATION_REBOOT:
/**
* kernel_restart - reboot the system
* @cmd: pointer to buffer containing command to execute for restart
* or %NULL
*
* Shutdown everything and perform a clean reboot.
* This is not safe to call in interrupt context.
*/
kernel_restart(NULL);
break;
case HIBERNATION_PLATFORM:
/**
* hibernation_platform_enter - Power off the system using the platform driver.
*/
hibernation_platform_enter();
case HIBERNATION_SHUTDOWN:
/**
* kernel_power_off - power_off the system
*
* Shutdown everything and perform a clean system power_off.
*/
kernel_power_off();
break;
}
/**
* kernel_halt - halt the system
*
* Shutdown everything and perform a clean system halt.
*/
kernel_halt();
/*
* Valid image is on the disk, if we continue we risk serious data
* corruption after resume.
*/
printk(KERN_CRIT "PM: Please power down manually\n");
while(1);
}
到 power_down 函数,已经 power down 了,后面的代码怎么跑?
自然是重新 power up 之后继续跑了。
首先是紧跟在 power_down 之后的下两句代码:
in_suspend = 0;
pm_restore_gfp_mask();
然后是:
Thaw:
// 春回大地,冰雪消融
thaw_processes();
Finish:
/**
* free_basic_memory_bitmaps - free memory bitmaps allocated by
* create_basic_memory_bitmaps(). The auxiliary pointers are necessary
* so that the bitmaps themselves are not referred to while they are being
* freed.
*/
free_basic_memory_bitmaps();
// 前面 disable 了,现在要 enable 回来
usermodehelper_enable();
Exit:
pm_notifier_call_chain(PM_POST_HIBERNATION);
// 对应于前面 pm_prepare_console 的处理
pm_restore_console();
// 我们已经不 running 了,别人可以再使用 snapshot device 了
atomic_inc(&snapshot_device_available);
Unlock:
mutex_unlock(&pm_mutex);
return error;
}
看完了 hibernate 函数,即 Suspend-to-disk 的处理。
回到 state_store 函数继续。
根据写入的字符串,找到对应的 state ,并以该 state 为参数调用函数 enter_state 。
看看 enter_state 函数的实现:
/**
* enter_state - Do common work of entering low-power state.
* @state: pm_state structure for state we're entering.
*
* Make sure we're the only ones trying to enter a sleep state. Fail
* if someone has beat us to it, since we don't want anything weird to
* happen when we wake up.
* Then, do the setup for suspend, enter the state, and cleaup (after
* we've woken up).
*/
int enter_state(suspend_state_t state)
{
int error;
// 这个函数前面看到过,判断当前 architecture 是否支持该 state
if (!valid_state(state))
return -ENODEV;
if (!mutex_trylock(&pm_mutex))
return -EBUSY;
printk(KERN_INFO "PM: Syncing filesystems ... ");
sys_sync();
printk("done.\n");
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
// 实现见后文
error = suspend_prepare();
if (error)
goto Unlock;
if (suspend_test(TEST_FREEZER))
goto Finish;
pr_debug("PM: Entering %s sleep\n", pm_states[state]);
pm_restrict_gfp_mask();
error = suspend_devices_and_enter(state);
pm_restore_gfp_mask();
Finish:
pr_debug("PM: Finishing wakeup.\n");
suspend_finish();
Unlock:
mutex_unlock(&pm_mutex);
return error;
}
函数 suspend_prepare 的实现:
/**
* suspend_prepare - Do prep work before entering low-power state.
*
* This is common code that is called for each state that we're entering.
* Run suspend notifiers, allocate a console and stop all processes.
*/
static int suspend_prepare(void)
{
int error;
if (!suspend_ops || !suspend_ops->enter)
return -EPERM;
// 前面见过,console相关的处理
pm_prepare_console();
// 前面也见过,只是参数略有不同
error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
if (error)
goto Finish;
// 前面也见过
error = usermodehelper_disable();
if (error)
goto Finish;
// 如果支持 suspend freezer ,该函数直接调用函数 freeze_processes 。
// 与前面看过的 hibernate 中的 prepare_processes 函数类似
error = suspend_freeze_processes();
// 一切 OK ,返回 0
if (!error)
return 0;
suspend_thaw_processes();
usermodehelper_enable();
Finish:
pm_notifier_call_chain(PM_POST_SUSPEND);
pm_restore_console();
return error;
}
回到函数 enter_state ,
// 只有定义了 CONFIG_PM_DEBUG ,并且是在测试的时候,suspend_test 才返回1,否则返回0,继续 suspend
if (suspend_test(TEST_FREEZER))
goto Finish;
// 让 gfp mask 变得更严格,这是 suspend 前最后调用的一个函数
// 后面的 pm_restore_gfp_mask 函数是 resume 回来调用的第一个函数,功能是恢复 gfp mask
// 这个在前文中也有看到,是 3.0.35 kernel 的 hibernate 函数中新加的处理
pm_restrict_gfp_mask();
// 函数的实现在后面
error = suspend_devices_and_enter(state);
suspend_devices_and_enter 函数的实现:
/**
* suspend_devices_and_enter - suspend devices and enter the desired system
* sleep state.
* @state: state to enter
*/
int suspend_devices_and_enter(suspend_state_t state)
{
int error;
if (!suspend_ops)
return -ENOSYS;
trace_machine_suspend(state);
// suspend_ops 在前文出现过,在 architecture 的 pm 模块的 init 或者 probe 函数中会设置 suspend_ops
if (suspend_ops->begin) {
error = suspend_ops->begin(state);
if (error)
goto Close;
}
// 函数 suspend_console 的实现见后文
suspend_console();
// 函数 suspend_test_start 和函数 suspend_test_finish 用于计时,并输出时间信息
suspend_test_start();
// 函数 dpm_suspend_start 的实现见后文
error = dpm_suspend_start(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to suspend\n");
goto Recover_platform;
}
suspend_test_finish("suspend devices");
// 此函数前面说过
if (suspend_test(TEST_DEVICES))
goto Recover_platform;
// 函数 suspend_enter 的实现见后文
error = suspend_enter(state);
Resume_devices:
suspend_test_start();
/**
* dpm_resume_end - Execute "resume" callbacks and complete system transition.
* @state: PM transition of the system being carried out.
*
* Execute "resume" callbacks for all devices and complete the PM transition of
* the system.
*/
// dpm_resume_end 调用 dpm_resume 和 dpm_complete
/**
* dpm_resume - Execute "resume" callbacks for non-sysdev devices.
* @state: PM transition of the system being carried out.
*
* Execute the appropriate "resume" callback for all devices whose status
* indicates that they are suspended.
*/
/**
* dpm_complete - Complete a PM transition for all non-sysdev devices.
* @state: PM transition of the system being carried out.
*
* Execute the ->complete() callbacks for all devices whose PM status is not
* DPM_ON (this allows new devices to be registered).
*/
dpm_resume_end(PMSG_RESUME);
suspend_test_finish("resume devices");
// console 处理
resume_console();
Close:
if (suspend_ops->end)
suspend_ops->end();
trace_machine_suspend(PWR_EVENT_EXIT);
return error;
Recover_platform:
if (suspend_ops->recover)
suspend_ops->recover();
goto Resume_devices;
}
函数 suspend_console 的实现:
/**
* suspend_console - suspend the console subsystem
*
* This disables printk() while we go into suspend states
*/
void suspend_console(void)
{
if (!console_suspend_enabled)
return;
printk("Suspending console(s) (use no_console_suspend to debug)\n");
console_lock();
console_suspended = 1;
up(&console_sem);
}
函数 dpm_suspend_start 的实现:
/**
* dpm_suspend_start - Prepare devices for PM transition and suspend them.
* @state: PM transition of the system being carried out.
*
* Prepare all non-sysdev devices for system PM transition and execute "suspend"
* callbacks for them.
*/
int dpm_suspend_start(pm_message_t state)
{
int error;
/**
* dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
* @state: PM transition of the system being carried out.
*
* Execute the ->prepare() callback(s) for all devices.
*/
error = dpm_prepare(state);
if (!error)
/**
* dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
* @state: PM transition of the system being carried out.
*/
error = dpm_suspend(state);
return error;
}
函数 dpm_prepare 和 函数 dpm_suspend 中的处理涉及到那些 devices 的 callback 被调用。
在函数 dpm_prepare 中,会遍历队列 dpm_list ,依次调用其中设备的 ->prepare() callback(s) ,
如果成功,则将其添加到 dpm_prepared_list 队列。
函数 dpm_suspend 遍历 dpm_prepared_list 队列,依次调用其中设备的 ->suspend() callback(s) 。
如果成功,则将其添加到 dpm_suspended_list 队列,后面调用到的 dpm_resume 函数会使用该队列。
dpm_resume 处理之后又将成员 move 到 dpm_prepared_list 队列。
函数 dpm_complete 中会处理 dpm_prepared_list 队列。
接下来的问题是, dpm_list 里的成员是谁添加到?
函数 device_pm_add 中会往 dpm_list 中添加成员:
/**
* device_pm_add - Add a device to the PM core's list of active devices.
* @dev: Device to add to the list.
*/
void device_pm_add(struct device *dev)
{
...
list_add_tail(&dev->power.entry, &dpm_list);
...
}
函数 device_add 中调用了函数 device_pm_add :
/**
* device_add - add device to device hierarchy.
* @dev: device.
*
* This is part 2 of device_register(), though may be called
* separately _iff_ device_initialize() has been called separately.
*
* This adds @dev to the kobject hierarchy via kobject_add(), adds it
* to the global and sibling lists for the device, then
* adds it to the other relevant subsystems of the driver model.
*
* NOTE: _Never_ directly free @dev after calling this function, even
* if it returned an error! Always use put_device() to give up your
* reference instead.
*/
int device_add(struct device *dev)
{
...
device_pm_add(dev);
...
}
举一个 audio device driver 的例子。 driver 的 init 函数中调用了函数 platform_device_add 。
函数 platform_device_add 中调用了函数 device_add :
/**
* platform_device_add - add a platform device to device hierarchy
* @pdev: platform device we're adding
*
* This is part 2 of platform_device_register(), though may be called
* separately _iff_ pdev was allocated by platform_device_alloc().
*/
int platform_device_add(struct platform_device *pdev)
{
...
ret = device_add(&pdev->dev);
...
}
函数 suspend_enter 的实现:
/**
* suspend_enter - enter the desired system sleep state.
* @state: state to enter
*
* This function should be called after devices have been suspended.
*/
static int suspend_enter(suspend_state_t state)
{
int error;
// suspend_ops 已经见过多次
if (suspend_ops->prepare) {
error = suspend_ops->prepare();
if (error)
goto Platform_finish;
}
/**
* dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
* @state: PM transition of the system being carried out.
*
* Prevent device drivers from receiving interrupts and call the "noirq" suspend
* handlers for all non-sysdev devices.
*/
error = dpm_suspend_noirq(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Platform_finish;
}
if (suspend_ops->prepare_late) {
error = suspend_ops->prepare_late();
if (error)
goto Platform_wake;
}
if (suspend_test(TEST_PLATFORM))
goto Platform_wake;
// 把 first_cpu 之外的 cpu 都通过函数 _cpu_down down 掉
error = disable_nonboot_cpus();
if (error || suspend_test(TEST_CPUS))
goto Enable_cpus;
arch_suspend_disable_irqs();
BUG_ON(!irqs_disabled());
/**
* syscore_suspend - Execute all the registered system core suspend callbacks.
*
* This function is executed with one CPU on-line and disabled interrupts.
*/
// 遍历 syscore_ops_list 队列,依次调用成员的 suspend 函数
// 函数 register_syscore_ops 会往队列 syscore_ops_list 上添加成员
/**
* register_syscore_ops - Register a set of system core operations.
* @ops: System core operations to register.
*/
error = syscore_suspend();
// 成功则进入 if 语句
if (!error) {
if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
// enter 函数的实现一般在pm.c中。fsl的imx6的pm.c中,enter suspend的时候调用了调用了iRam中的suspend处理函数,最后等待中断的到来。
// iRam中的函数是pm模块初始化的时候,copy过去的。
// 在举例中,imx6上用的是kernel 3.0.35。
// kernel 2.6.35 对应的 imx5上的处理类似,只是细节上稍有不同。
error = suspend_ops->enter(state);
events_check_enabled = false;
}
/**
* syscore_resume - Execute all the registered system core resume callbacks.
*
* This function is executed with one CPU on-line and disabled interrupts.
*/
// 重新上电了,处理的队列也是 syscore_ops_list
syscore_resume();
}
// 与 arch_suspend_disable_irqs 对应
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
Enable_cpus:
// 与 disable_nonboot_cpus 对应
enable_nonboot_cpus();
Platform_wake:
if (suspend_ops->wake)
suspend_ops->wake();
// 与 dpm_suspend_noirq 对应
dpm_resume_noirq(PMSG_RESUME);
Platform_finish:
if (suspend_ops->finish)
suspend_ops->finish();
return error;
}