由于arm系统中没有bios设备, 所以只能为arm系统创建一个虚拟的字符设备与用户空间进行通讯.
这就是/arch/arm/kernel/amp.c
1. 工作原理:
这个apm中实现一个misc设备,实质上也是一个字符设备, misc设备的主设备号是10, 而apm_bios作为一
个misc设备, 次设备号是134. 定义为:
/*
* The apm_bios device is one of the misc char devices.
* This is its minor number.
*/
#define APM_MINOR_DEV 134
这个apm_bios设备通过ioctl系统调用和用户空间进行通讯, 即当用户进程通过ioctl发来suspend命令时
, 它就传给内核, 使系统进入suspend状态.
2. 初始化
static int __init apm_init(void)
{
int ret;
if (apm_disabled) {
printk(KERN_NOTICE "apm: disabled on user request./n");
return -ENODEV;
}
if (PM_IS_ACTIVE()) {
printk(KERN_NOTICE "apm: overridden by ACPI./n");
return -EINVAL;
}
pm_active = 1;
//创建一个线程, 用于处理事件队列, 工作函数是kapmd
//这个线程好像在arm中没有作用?
ret = kernel_thread(kapmd, NULL, CLONE_KERNEL);
if (ret < 0) {
pm_active = 0;
return ret;
}
//通过proc向用户空间输出apm信息
#ifdef CONFIG_PROC_FS
create_proc_info_entry("apm", 0, NULL, apm_get_info);
#endif
//注册misc设备
ret = misc_register(&apm_device);
if (ret != 0) {
remove_proc_entry("apm", NULL);
pm_active = 0;
wake_up(&kapmd_wait);
wait_for_completion(&kapmd_exit);
}
return ret;
}
注册的结构为:
static struct file_operations apm_bios_fops = {
.owner = THIS_MODULE,
.read = apm_read,
.poll = apm_poll,
.ioctl = apm_ioctl,
.open = apm_open,
.release = apm_release,
};
static struct miscdevice apm_device = {
.minor = APM_MINOR_DEV,
.name = "apm_bios",
.fops = &apm_bios_fops
};
3. 结构函数的实现
当一个用户进程打开apm_bios设备时, 它就会调用这个函数
static int apm_open(struct inode * inode, struct file * filp)
{
struct apm_user *as;
//分配一个apm_user结构, 来表示一个用户进程
as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
if (as) {
memset(as, 0, sizeof(*as));
/*
* XXX - this is a tiny bit broken, when we consider BSD
* process accounting. If the device is opened by root, we
* instantly flag that we used superuser privs. Who knows,
* we might close the device immediately without doing a
* privileged operation -- cevans
*/
//读写等权限设置
as->suser = capable(CAP_SYS_ADMIN);
as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
//将这个用户加入用户队列
down_write(&user_list_lock);
list_add(&as->list, &apm_user_list);
up_write(&user_list_lock);
//这是一个传递私有数据的一个通用方式
filp->private_data = as;
}
return as ? 0 : -ENOMEM;
}
当用户空间进程去读这个设备时, 这个函数就会被调用.
这个函数的主要作用是将事件读出到用户空间
static ssize_t apm_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
{
struct apm_user *as = fp->private_data;
apm_event_t event;
int i = count, ret = 0;
if (count < sizeof(apm_event_t))
return -EINVAL;
//队列空, 且进程非阻塞读, 立刻返回
if (queue_empty(&as->queue) && fp->f_flags & O_NONBLOCK)
return -EAGAIN;
//否则等待到队列非空为止,
wait_event_interruptible(apm_waitqueue, !queue_empty(&as->queue));
//将队列中的事件复制给用户空间
while ((i >= sizeof(event)) && !queue_empty(&as->queue)) {
event = queue_get_event(&as->queue);
ret = -EFAULT;
if (copy_to_user(buf, &event, sizeof(event)))
break;
//设置状态
if (event == APM_SYS_SUSPEND || event == APM_USER_SUSPEND)
as->suspend_state = SUSPEND_READ;
buf += sizeof(event);
i -= sizeof(event);
}
if (i < count)
ret = count - i;
return ret;
}
//这个poll/select的后端实现, 用于查询有没有数据可读
static unsigned int apm_poll(struct file *fp, poll_table * wait)
{
struct apm_user *as = fp->private_data;
poll_wait(fp, &apm_waitqueue, wait);
return queue_empty(&as->queue) ? 0 : POLLIN | POLLRDNORM;
}
//这个是这个设备的核心函数, 用于内核与用户空间交互
/*
* apm_ioctl - handle APM ioctl
*
* APM_IOC_SUSPEND
* This IOCTL is overloaded, and performs two functions. It is used to:
* - initiate a suspend
* - acknowledge a suspend read from /dev/apm_bios.
* Only when everyone who has opened /dev/apm_bios with write permission
* has acknowledge does the actual suspend happen.
*/
static int
apm_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg)
{
struct apm_user *as = filp->private_data;
unsigned long flags;
int err = -EINVAL;
if (!as->suser || !as->writer)
return -EPERM;
switch (cmd) {
case APM_IOC_SUSPEND:
as->suspend_result = -EINTR;
if (as->suspend_state == SUSPEND_READ) {
/*
* If we read a suspend command from /dev/apm_bios,
* then the corresponding APM_IOC_SUSPEND ioctl is
* interpreted as an acknowledge.
*/
as->suspend_state = SUSPEND_ACKED;
suspends_pending--;
} else {
/*
* Otherwise it is a request to suspend the system.
* Queue an event for all readers, and expect an
* acknowledge from all writers who haven't already
* acknowledged.
*/
queue_event(APM_USER_SUSPEND, as);
}
/*
* If there are no further acknowledges required, suspend
* the system.
*/
if (suspends_pending == 0)
apm_suspend(); //系统进入suspend状态
//从suspend中返回
/*
* Wait for the suspend/resume to complete. If there are
* pending acknowledges,
这部分说明kernel里面的电源管理的核心函数
这部分的代码在/kernel/power目录中
1. 我们在(1)中看到apm_suspend()调用以下这个函数, 我们就从这里开始
typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
#define PM_SUSPEND_DISK ((__force suspend_state_t) 4)
#define PM_SUSPEND_MAX ((__force suspend_state_t) 5)
/**
* pm_suspend - Externally visible function for suspending system.
* @state: Enumarted value of state to enter.
*
* Determine whether or not value is within range, get state
* structure, and enter (above).
*/
//注意这里的注释, Externally visible function for suspending system.
int pm_suspend(suspend_state_t state)
{
//arm apm传入的是PM_SUSPEND_MEM
if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
return enter_state(state);
return -EINVAL;
}
/**
* enter_state - Do common work of entering low-power state.
* @state: pm_state structure for state we're entering.
*
* Make sure we're the only ones trying to enter a sleep state. Fail
* if someone has beat us to it, since we don't want anything weird to
* happen when we wake up.
* Then, do the setup for suspend, enter the state, and cleaup (after
* we've woken up).
*/
static int enter_state(suspend_state_t state)
{
int error;
//获得锁, 参见注释
if (down_trylock(&pm_sem))
return -EBUSY;
//挂起磁盘的请求, 不是我我们的请求
if (state == PM_SUSPEND_DISK) {
error = pm_suspend_disk();
goto Unlock;
}
//prepare阶段
pr_debug("PM: Preparing system for %s sleep/n", pm_states[state]);
if ((error = suspend_prepare(state)))
goto Unlock;
//进入阶段
pr_debug("PM: Entering %s sleep/n", pm_states[state]);
error = suspend_enter(state);
//完成挂起, 恢复状态
pr_debug("PM: Finishing wakeup./n");
suspend_finish(state);
Unlock:
up(&pm_sem);
return error;
}
2.1 准备阶段, 为状态变换做准备
/**
* suspend_prepare - Do prep work before entering low-power state.
* @state: State we're entering.
*
* This is common code that is called for each state that we're
* entering. Allocate a console, stop all processes, then make sure
* the platform can enter the requested state.
*/
static int suspend_prepare(suspend_state_t state)
{
int error = 0;
unsigned int free_pages;
if (!pm_ops || !pm_ops->enter)
return -EPERM;
pm_prepare_console();
disable_nonboot_cpus();
if (num_online_cpus() != 1) {
error = -EPERM;
goto Enable_cpu;
}
//进程处理
if (freeze_processes()) {
error = -EAGAIN;
goto Thaw;
}
//内存处理
if ((free_pages = nr_free_pages()) < FREE_PAGE_NUMBER) {
pr_debug("PM: free some memory/n");
shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
if (nr_free_pages() < FREE_PAGE_NUMBER) {
error = -ENOMEM;
printk(KERN_ERR "PM: No enough memory/n");
goto Thaw;
}
}
//调用体系结构相关的函数, 这是在系统初始化的时候注册的.
if (pm_ops->prepare) {
if ((error = pm_ops->prepare(state)))
goto Thaw;
}
//挂起设备
if ((error = device_suspend(PMSG_SUSPEND))) {
printk(KERN_ERR "Some devices failed to suspend/n");
goto Finish;
}
return 0;
Finish:
if (pm_ops->finish)
pm_ops->finish(state);
Thaw:
thaw_processes();
Enable_cpu:
enable_nonboot_cpus();
pm_restore_console();
return error;
}
2.2挂起设备
/**
* device_suspend - Save state and stop all devices in system.
* @state: Power state to put each device in.
*
* Walk the dpm_active list, call ->suspend() for each device, and move
* it to dpm_off.
* Check the return value for each. If it returns 0, then we move the
* the device to the dpm_off list. If it returns -EAGAIN, we move it to
* the dpm_off_irq list. If we get a different error, try and back out.
*
* If we hit a failure with any of the devices, call device_resume()
* above to bring the suspended devices back to life.
*
*/
int device_suspend(pm_message_t state)
{
int error = 0;
down(&dpm_sem);
down(&dpm_list_sem);
//遍历设备链表, 当一个设备被注册进系统时, 它同时会被加入到这个dpm_active队列中
while (!list_empty(&dpm_active) && error == 0) {
struct list_head * entry = dpm_active.prev;
struct device * dev = to_device(entry);
get_device(dev);
up(&dpm_list_sem);
//挂起这个设备
error = suspend_device(dev, state);
down(&dpm_list_sem);
/* Check if the device got removed */
//加入off队列, 用于以后唤醒
if (!list_empty(&dev->power.entry)) {
/* Move it to the dpm_off or dpm_off_irq list */
if (!error) {
list_del(&dev->power.entry);
list_add(&dev->power.entry, &dpm_off);
} else if (error == -EAGAIN) {
list_del(&dev->power.entry);
list_add(&dev->power.entry, &dpm_off_irq);
error = 0;
}
}
if (error)
printk(KERN_ERR "Could not suspend device %s: "
"error %d/n", kobject_name(&dev->kobj), error);
put_device(dev);
}
up(&dpm_list_sem);
if (error) { //出错了! 恢复原来的状态
/* we failed... before resuming, bring back devices from
* dpm_off_irq list back to main dpm_off list, we do want
* to call resume() on them, in case they partially suspended
* despite returning -EAGAIN
*/
while (!list_empty(&dpm_off_irq)) {
struct list_head * entry = dpm_off_irq.next;
list_del(entry);
list_add(entry, &dpm_off);
}
再看看(2)中的enter_state():
/**
* enter_state - Do common work of entering low-power state.
* @state: pm_state structure for state we're entering.
*
* Make sure we're the only ones trying to enter a sleep state. Fail
* if someone has beat us to it, since we don't want anything weird to
* happen when we wake up.
* Then, do the setup for suspend, enter the state, and cleaup (after
* we've woken up).
*/
static int enter_state(suspend_state_t state)
{
int error;
//获得锁, 参见注释
if (down_trylock(&pm_sem))
return -EBUSY;
//挂起磁盘的请求, 不是我我们的请求
if (state == PM_SUSPEND_DISK) {
error = pm_suspend_disk();
goto Unlock;
}
//prepare阶段
pr_debug("PM: Preparing system for %s sleep/n", pm_states[state]);
if ((error = suspend_prepare(state)))
goto Unlock;
//进入阶段
pr_debug("PM: Entering %s sleep/n", pm_states[state]);
error = suspend_enter(state);
//完成挂起, 恢复状态
pr_debug("PM: Finishing wakeup./n");
suspend_finish(state);
Unlock:
up(&pm_sem);
return error;
}
可以看到, 状态的转换分三个阶段, 分别为prepare, enter, finish.
我们已经再二中说明, 这三个阶段通过体系无关的函数,
最终会调用与体系结构相关的函数.
他们分别是:
pm_ops->prepare(state)
pm_ops->enter(state)
pm_ops->finish(state)
这个pm_ops就是在体系结构初始化的时候注册进来的,
接着看arch/arm/mach-s3c2410/pm.c
/*
* Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk.
*/
static struct pm_ops s3c2410_pm_ops = {
.pm_disk_mode = PM_DISK_FIRMWARE,
.prepare = s3c2410_pm_prepare,
.enter = s3c2410_pm_enter,
.finish = s3c2410_pm_finish,
};
/* s3c2410_pm_init
*
* Attach the power management functions. This should be called
* from the board specific initialisation if the board supports
* it.
*/
int __init s3c2410_pm_init(void)
{
printk("S3C2410 Power Management, (c) 2004 Simtec Electronics/n");
pm_set_ops(&s3c2410_pm_ops);
return 0;
}
这就是实现三个状态转换的三个钩子函数.
/**
* pm_set_ops - Set the global power method table.
* @ops: Pointer to ops structure.
*/
//这个函数较为简单, 只是将/kerenel/power/main.c里的全局变量pm_ops设置成
s3c2410_pm_ops而已了.
//这就完成了这个全局变量的初始化.后续对pm_ops的访问实质上都是访问
s3c2410_pm_ops.
void pm_set_ops(struct pm_ops * ops)
{
down(&pm_sem);
pm_ops = ops;
up(&pm_sem);
}
接着再看他们的实现:
先从最简单的开始,
/*
* Called after processes are frozen, but before we shut down devices.
*/
static int s3c2410_pm_prepare(suspend_state_t state)
{
return 0;
}
/*
* Called after devices are re-setup, but before processes are thawed.
*/
static int s3c2410_pm_finish(suspend_state_t state)
{
return 0;
}
如上, 可以看到, prepare和finishi在这个体系中都是空操作, 就是说, 对于s3c2410,
无需特殊的工作.
而这个结构的核心就是剩下的s3c2410_pm_enter了. 它真正实现suspend/resume
的状态转换.
#define any_allowed(mask, allow) (((mask) & (allow)) != (allow))
/* s3c2410_pm_enter
*
* central control for sleep/resume process
*/
static int s3c2410_pm_enter(suspend_state_t state)
{
unsigned long regs_save[16]; //用于保存16个通用寄存器的栈
unsigned long tmp;
/* ensure the debug is initialised (if enabled) */
s3c2410_pm_debug_init();
DBG("s3c2410_pm_enter(%d)/n", state);
if (state != PM_SUSPEND_MEM) {
printk(KERN_ERR PFX "error: only PM_SUSPEND_MEM supported/n");
return -EINVAL;
}
/* check if we have anything to wake-up with... bad things seem
* to happen if you suspend with no wakeup (system will often
* require a full power-cycle)
*/
//检查允许的唤醒中断
if (!any_allowed(s3c_irqwake_intmask, s3c_irqwake_intallow) &&
!any_allowed(s3c_irqwake_eintmask, s3c_irqwake_eintallow)) {
printk(KERN_ERR PFX "No sources enabled for wake-up!/n");
printk(KERN_ERR PFX "Aborting sleep/n");
return -EINVAL;
}
/* prepare check area if configured */
//一些准备工作
s3c2410_pm_check_prepare();
/* store the physical address of the register recovery block */
//寄存器的物理地址
s3c2410_sleep_save_phys = virt_to_phys(regs_save);
DBG("s3c2410_sleep_save_phys=0x%08lx/n", s3c2410_sleep_save_phys);
/* ensure at least GESTATUS3 has the resume address */
//将系统被唤醒后执行的函数s3c2410_cpu_resume物理地址写入S3C2410_GSTATUS3.
__raw_writel(virt_to_phys(s3c2410_cpu_resume), S3C2410_GSTATUS3);
DBG("GSTATUS3 0x%08x/n", __raw_readl(S3C2410_GSTATUS3));
DBG("GSTATUS4 0x%08x/n", __raw_readl(S3C2410_GSTATUS4));
/* save all necessary core registers not covered by the drivers */
//保存不属于driver的核心寄存器, driver的各自保存
s3c2410_pm_do_save(gpio_save, ARRAY_SIZE(gpio_save));
s3c2410_pm_do_save(irq_save, ARRAY_SIZE(irq_save));
s3c2410_pm_do_save(core_save, ARRAY_SIZE(core_save));
s3c2410_pm_do_save(uart_save, ARRAY_SIZE(uart_save));
/* set the irq configuration for wake */
//设置外部中断用于唤醒
s3c2410_pm_configure_extint();
DBG("sleep: irq wakeup masks: %08lx,%08lx/n",
s3c_irqwake_intmask, s3c_irqwake_eintmask);
//开中断??
__raw_writel(s3c_irqwake_intmask, S3C2410_INTMSK);
__raw_writel(s3c_irqwake_eintmask, S3C2410_EINTMASK);
/* ack any outstanding external interrupts before we go to sleep */
__raw_writel(__raw_readl(S3C2410_EINTPEND), S3C2410_EINTPEND);
/* flush cache back to ram */
arm920_flush_kern_cache_all();
s3c2410_pm_check_store();
/* send the cpu to sleep... */
//关闭时钟
__raw_writel(0x00, S3C2410_CLKCON); /* turn off clocks over sleep */
//系统进入睡眠, 寄存器值保存在(regs_save)中.
//这个函数和上面的s3c2410_cpu_resume(),都是汇编实现的, 在sleep.S中
s3c2410_cpu_suspend(regs_save);
//当接收到一个外部中断时,系统开始恢复
/* restore the cpu state */
cpu_init();
/* unset the return-from-sleep flag, to ensure reset */
tmp = __raw_readl(S3C2410_GSTATUS2);
tmp &= S3C2410_GSTATUS2_OFFRESET;
__raw_writel(tmp, S3C2410_GSTATUS2);
/* restore the system state */
//上面save的逆操作
s3c2410_pm_do_restore_core(core_save, ARRAY_SIZE(core_save));
s3c2410_pm_do_restore(gpio_save, ARRAY_SIZE(gpio_save));
s3c2410_pm_do_restore(irq_save, ARRAY_SIZE(irq_save));
s3c2410_pm_do_restore(uart_save, ARRAY_SIZE(uart_save));
//一下均是上面的一些准备工作的逆操作
s3c2410_pm_debug_init();
/* check what irq (if any) restored the system */
DBG("post sleep: IRQs 0x%08x, 0x%08x/n",
__raw_readl(S3C2410_SRCPND),
__raw_readl(S3C2410_EINTPEND));
s3c2410_pm_show_resume_irqs(IRQ_EINT0, __raw_readl(S3C2410_SRCPND),
s3c_irqwake_intmask);
s3c2410_pm_show_resume_irqs(IRQ_EINT4-4, __raw_readl(S3C2410_EINTPEND),
s3c_irqwake_eintmask);
DBG("post sleep, preparing to return/n");
s3c2410_pm_check_restore();
/* ok, let's return from sleep */
DBG("S3C2410 PM Resume (post-restore)/n");
return 0;
}