Linux设备驱动程序第三版学习（6）- 高级字符驱动程序操作（续1）- 进程休眠 .

最新推荐文章于 2024-10-04 21:58:42 发布

Hens007

最新推荐文章于 2024-10-04 21:58:42 发布

阅读量1.1k

点赞数

分类专栏： Linux设备驱动程序第三版学习笔记文章标签： linux struct function linux内核 list up

Linux设备驱动程序第三版学习笔记专栏收录该内容

11 篇文章 0 订阅

订阅专栏

第六章：高级字符驱动程序操作(续1)
以下是第2部分：掌握如何使进程休眠（并唤醒）
分为4个小的部分（都是通过分析源码的形式，必要时加以总结）：
1、进程休眠的细节
2、进程唤醒的细节
3、scullpipe中read的实现
4、scullpipe中write的实现

1、进程休眠的细节
Linux内核中最简单的休眠方式就是称为wait_event的宏（以及它的几个变种），形式如下：

[cpp] view plain copy print ?

wait_event(queue, condition)
wait_event_interruptible(queue, condition)
wait_event_timeout(queue, condition, timeout)
wait_event_interruptible_timeout(queue, condition, timeout)

进程调用上面某一个宏进入休眠，最常用的是wait_event_interruptible，这个宏的具体细节如下：

[cpp] view plain copy print ?

#define wait_event_interruptible(wq, condition) /
({ /
int __ret = 0; /
if (!(condition)) //这里面包含了另一个宏 /
__wait_event_interruptible(wq, condition, __ret); /
__ret; /
})

看一看__wait_event_interruptible这个宏

[cpp] view plain copy print ?

#define __wait_event_interruptible(wq, condition, ret) /
do {
//第一个步骤是建立并初始化一个等待队列入口
//也就是分配并初始化一个wait_queue_t结构
//通过调用DEFINE_WAIT宏来实现
// 这个宏的定义如下：
// #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
// #define DEFINE_WAIT_FUNC(name, function) /
// wait_queue_t name = { /
// .private = current, /
// .func = function, /
// .task_list = LIST_HEAD_INIT((name).task_list), /
// }
DEFINE_WAIT(__wait); //建立并初始化了一个名为__wait的等待队列入口
//第二个步骤是将等待队列入口添加到队列中，并设置进程状态
for (;;) {
// 调用prepare_to_wait函数，可以在wait.c中看到定义。此函数的功能是：
// 1. 将等待队列入口添加到队列中。这步通过__add_wait_queue完成
// 2. 设置进程状态为TASK_INTERRUPTIBLE。这步通过set_current_state(state)完成
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); /
//在进行上面的操作是条件可能变化了，所以这里还要再判断一次
if (condition) /
break; /
if (!signal_pending(current)) { /
//调用schedule函数，对于这个进程调度函数我没有研究。大概的理解是进程在这里让出了CPU，用某一个进程替换了当前的进程。
schedule(); /
continue; /
} /
//一旦schedule返回，则退出for循环
ret = -ERESTARTSYS; /
break; /
} /
//接下来进行清理工作。调用finish_wait函数，可以在wait.c中看到定义。该函数的作用和前面的
//prepare_to_wait相反。
// 1.设置进程状态为TASK_RUNNING
// 2.将__wait从等待队列中移除。该步调用了list_del_init函数
finish_wait(&wq, &__wait); /
} while (0)

#define __wait_event_interruptible(wq, condition, ret)            /
    do {            
        //第一个步骤是建立并初始化一个等待队列入口
        //也就是分配并初始化一个wait_queue_t结构
        //通过调用DEFINE_WAIT宏来实现
        // 这个宏的定义如下：
        // #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
        // #define DEFINE_WAIT_FUNC(name, function)                /
        //    wait_queue_t name = {                        /
        //        .private    = current,                /
        //        .func        = function,                /
        //        .task_list    = LIST_HEAD_INIT((name).task_list),    /
        //    }

DEFINE_WAIT(__wait);    //建立并初始化了一个名为__wait的等待队列入口
        
        //第二个步骤是将等待队列入口添加到队列中，并设置进程状态 
        for (;;) {
            // 调用prepare_to_wait函数，可以在wait.c中看到定义。此函数的功能是：
            // 1. 将等待队列入口添加到队列中。这步通过__add_wait_queue完成
            // 2. 设置进程状态为TASK_INTERRUPTIBLE。 这步通过set_current_state(state)完成 
            prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);    /

//在进行上面的操作是条件可能变化了，所以这里还要再判断一次 
            if (condition)                        /
                break;                        /

if (!signal_pending(current)) {                /
                //调用schedule函数，对于这个进程调度函数我没有研究。大概的理解是进程在这里让出了CPU，用某一个进程替换了当前的进程。 
                schedule();                    /
                continue;                    /
            }                            /
            
            //一旦schedule返回，则退出for循环 
            ret = -ERESTARTSYS;                    /
            break;                            /
        }                                /

//接下来进行清理工作。调用finish_wait函数，可以在wait.c中看到定义。该函数的作用和前面的
        //prepare_to_wait相反。
        // 1.设置进程状态为TASK_RUNNING
        // 2.将__wait从等待队列中移除。该步调用了list_del_init函数         
        finish_wait(&wq, &__wait);                    /
    } while (0)

总之，调用了wait_event或其变种，则进程进入休眠。

2. 进程唤醒的细节
与休眠细节相似，唤醒是通过调用wake_up宏来实现的，最常用的变种是wake_up_interruptible。这个宏的具体细节如下：

[cpp] view plain copy print ?

#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
//__wake_up函数定义在sched.c中
void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags); //自旋锁
__wake_up_common(q, mode, nr_exclusive, 0, key); //wakeup函数的核心，定义在下边
spin_unlock_irqrestore(&q->lock, flags); //解锁
}
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
//下面调用了一个宏list_for_each_entry_safe
//关于这个宏可以参考转载的”关于linux内核中等待队列数据结构之思考“一文，感谢wangchaoxjtuse
//这个宏展开是一个for循环，功能就是遍历这个链表，把curr逐一指向链表中的每个项
//对于每个链表项，都调用该结构中的 wait_queue_func_t func函数来尝试唤醒该项进程
//关于func的细节见下面的源码分析
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}

#define wake_up_interruptible(x)    __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)

//__wake_up函数定义在sched.c中 
    void __wake_up(wait_queue_head_t *q, unsigned int mode,
                int nr_exclusive, void *key)
    {
        unsigned long flags;

spin_lock_irqsave(&q->lock, flags); //自旋锁 
        __wake_up_common(q, mode, nr_exclusive, 0, key); //wakeup函数的核心，定义在下边 
        spin_unlock_irqrestore(&q->lock, flags); //解锁 
    }

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
                int nr_exclusive, int wake_flags, void *key)
    {
        wait_queue_t *curr, *next;

//下面调用了一个宏list_for_each_entry_safe
        //关于这个宏可以参考转载的”关于linux内核中等待队列数据结构之思考“一文，感谢wangchaoxjtuse
        //这个宏展开是一个for循环，功能就是遍历这个链表，把curr逐一指向链表中的每个项
         //对于每个链表项，都调用该结构中的 wait_queue_func_t func函数来尝试唤醒该项进程
         //关于func的细节见下面的源码分析 
        list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
            unsigned flags = curr->flags;

if (curr->func(curr, mode, wake_flags, key) &&
                    (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
                break;
        }
    }

================分析wait_queue_func_t的源码================
在wait.h中可以看到：

[cpp] view plain copy print ?

typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key)

其中default_wake_function定义在sched.c中，如下

[cpp] view plain copy print ?

int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
void *key)
{
return try_to_wake_up(curr->private, mode, wake_flags);
}

看看try_to_wake_up函数，这是唤醒进程的核心函数：

[cpp] view plain copy print ?

/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
* @state: the mask of task states that can be woken
* @sync: do a synchronous wakeup?
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
* re-schedule is in progress), and as such you're allowed to do
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
* returns failure only if the task is already active.
*/
static int try_to_wake_up(struct task_struct *p, unsigned int state,
int wake_flags)
{
int cpu, orig_cpu, this_cpu, success = 0;
unsigned long flags;
struct rq *rq, *orig_rq;
// 关于下面的两行代码，需要知道：
// #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
// 对于如何取得__SCHED_FEAT_##x, 参考本博客的一篇：“关于宏的一个应用”。
if (!sched_feat(SYNC_WAKEUPS))
wake_flags &= ~WF_SYNC;
//get_cpu函数获得对当前处理器的引用并且返回处理器的ID
this_cpu = get_cpu();
//下个语句是多处理器的写内存屏障。
//读写屏障像一堵墙，所有在设置读写屏障之前发起的内存访问，必须先于在设置屏障之后发起的内存访问
//之前完成，确保内存访问按程序的顺序完成。详情参照本博客转载的一篇：“优化屏障和内存屏障”。
//相关的屏障还有：
// mb() 适用于多处理器和单处理器的内存屏障
// rmb() 适用于多处理器和单处理器的读内存屏障
// wmb() 适用于多处理器和单处理器的写内存那屏障
// smp_mb() 适用于多处理器的内存屏障
// smp_rmb() 适用于多处理器的读内存屏障
// smp_wmb() 适用于多处理器的写内存屏障
smp_wmb();
//对可执行队列操作前，应该先锁住它
//上锁和解锁函数原型是：
// struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
// void task_rq_unlock(struct rq *rq, unsigned long *flags)
rq = orig_rq = task_rq_lock(p, &flags);
//刷新队列时钟
update_rq_clock(rq);
//如果当前进程的状态不是要唤醒的进程状态，则不唤醒本进程。直接跳到out处，解锁并返回对当前处理器
//的引用
if (!(p->state & state))
goto out;
//如果当前进程就在运行队列(runqueue)中，则无需唤醒本进程。直接跳转到out_running处。
if (p->se.on_rq)
goto out_running;
//下面两句返回当前进程p所使用的CPU编号，并把编号保存到orig_cpu中
cpu = task_cpu(p);
orig_cpu = cpu;
#ifdef CONFIG_SMP //如果是多CPU的情况
//task_running定义在sched.c中，return task_current(rq, p);
//task_current也是定义在sched.c中，return rq->curr == p;
if (unlikely(task_running(rq, p)))
goto out_activate;
/*
* In order to handle concurrent wakeups and release the rq->lock
* we put the task in TASK_WAKING state.
*
* First fix up the nr_uninterruptible count:
*/
// 下面宏定义task_contributes_to_load在linux/sched.h中，如下：
// #define task_contributes_to_load(task) /
// ((task->state & TASK_UNINTERRUPTIBLE) != 0 && /
// (task->flags & PF_FREEZING) == 0)
// 判断两个条件：1.任务状态是否是TASK_UNINTERRUPTIBLE 2.标记为是否是PF_FREEZING
if (task_contributes_to_load(p)) {
if (likely(cpu_online(orig_cpu))) /*检测cpu是否在线，Some places use cpu_online() where they should be using cpu_possible,most commonly for tallying statistics*/
rq->nr_uninterruptible--; /*nr_uninterruptible记录了该CPU不可中断状态进程的个数，这里把它减1*/
else
this_rq()->nr_uninterruptible--; //this_rq取得当前CPU的运行队列
}
p->state = TASK_WAKING; //设置进程状态为TASK_WAKING
if (p->sched_class->task_waking)
p->sched_class->task_waking(rq, p); /*调用当前进程调度类的task_waking函数，进行唤醒操作 */
cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
if (cpu != orig_cpu)
set_task_cpu(p, cpu);
__task_rq_unlock(rq);
rq = cpu_rq(cpu);
spin_lock(&rq->lock);
update_rq_clock(rq);
/*
* We migrated the task without holding either rq->lock, however
* since the task is not on the task list itself, nobody else
* will try and migrate the task, hence the rq should match the
* cpu we just moved it to.
*/
WARN_ON(task_cpu(p) != cpu);
WARN_ON(p->state != TASK_WAKING);
#ifdef CONFIG_SCHEDSTATS //对于需要收集调度器状态的情况
schedstat_inc(rq, ttwu_count);
if (cpu == this_cpu)
schedstat_inc(rq, ttwu_local);
else {
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
}
#endif /* CONFIG_SCHEDSTATS */
out_activate:
#endif /* CONFIG_SMP */
schedstat_inc(p, se.nr_wakeups);
if (wake_flags & WF_SYNC)
schedstat_inc(p, se.nr_wakeups_sync);
if (orig_cpu != cpu)
schedstat_inc(p, se.nr_wakeups_migrate);
if (cpu == this_cpu)
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
activate_task(rq, p, 1);
success = 1;
/*
* Only attribute actual wakeups done by this task.
*/
if (!in_interrupt()) {
struct sched_entity *se = ¤t->se;
u64 sample = se->sum_exec_runtime;
if (se->last_wakeup)
sample -= se->last_wakeup;
else
sample -= se->start_runtime;
update_avg(&se->avg_wakeup, sample);
se->last_wakeup = se->sum_exec_runtime;
}
out_running:
//下面这两句是干什么用的我也不清楚，请高手指教。多谢多谢！！！
trace_sched_wakeup(rq, p, success);
check_preempt_curr(rq, p, wake_flags);
//设置当前进程状态
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
p->sched_class->task_woken(rq, p);
if (unlikely(rq->idle_stamp)) {
u64 delta = rq->clock - rq->idle_stamp;
u64 max = 2*sysctl_sched_migration_cost;
if (delta > max)
rq->avg_idle = max;
else
update_avg(&rq->avg_idle, delta);
rq->idle_stamp = 0;
}
#endif
out:
//解锁
task_rq_unlock(rq, &flags);
//返回对当前处理器的引用
put_cpu();
return success;
}

/***
     * try_to_wake_up - wake up a thread
     * @p: the to-be-woken-up thread
     * @state: the mask of task states that can be woken
     * @sync: do a synchronous wakeup?
     *
     * Put it on the run-queue if it's not already there. The "current"
     * thread is always on the run-queue (except when the actual
     * re-schedule is in progress), and as such you're allowed to do
     * the simpler "current->state = TASK_RUNNING" to mark yourself
     * runnable without the overhead of this.
     *
     * returns failure only if the task is already active.
     */ 
    static int try_to_wake_up(struct task_struct *p, unsigned int state,
                  int wake_flags)
    {
        int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
        struct rq *rq, *orig_rq;

// 关于下面的两行代码，需要知道：
        // #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
        // 对于如何取得__SCHED_FEAT_##x, 参考本博客的一篇：“关于宏的一个应用”。 
        if (!sched_feat(SYNC_WAKEUPS))
            wake_flags &= ~WF_SYNC;

//get_cpu函数获得对当前处理器的引用并且返回处理器的ID 
        this_cpu = get_cpu();

//下个语句是多处理器的写内存屏障。
        //读写屏障像一堵墙，所有在设置读写屏障之前发起的内存访问，必须先于在设置屏障之后发起的内存访问
        //之前完成，确保内存访问按程序的顺序完成。详情参照本博客转载的一篇：“优化屏障和内存屏障”。
        //相关的屏障还有：
        // mb()    适用于多处理器和单处理器的内存屏障
        // rmb()    适用于多处理器和单处理器的读内存屏障
        // wmb()    适用于多处理器和单处理器的写内存那屏障
        // smp_mb()    适用于多处理器的内存屏障
        // smp_rmb()    适用于多处理器的读内存屏障
        // smp_wmb()    适用于多处理器的写内存屏障 
        smp_wmb();

//对可执行队列操作前，应该先锁住它
        //上锁和解锁函数原型是：
        // struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
        // void task_rq_unlock(struct rq *rq, unsigned long *flags) 
        rq = orig_rq = task_rq_lock(p, &flags);

//刷新队列时钟 
        update_rq_clock(rq);

//如果当前进程的状态不是要唤醒的进程状态，则不唤醒本进程。直接跳到out处，解锁并返回对当前处理器
        //的引用 
        if (!(p->state & state))
            goto out;
        //如果当前进程就在运行队列(runqueue)中，则无需唤醒本进程。直接跳转到out_running处。 
        if (p->se.on_rq)
            goto out_running;

//下面两句返回当前进程p所使用的CPU编号，并把编号保存到orig_cpu中 
        cpu = task_cpu(p);
        orig_cpu = cpu;

#ifdef CONFIG_SMP //如果是多CPU的情况 
        //task_running定义在sched.c中，return task_current(rq, p);
        //task_current也是定义在sched.c中，return rq->curr == p; 
        if (unlikely(task_running(rq, p)))
            goto out_activate;

/*
         * In order to handle concurrent wakeups and release the rq->lock
         * we put the task in TASK_WAKING state.
         *
         * First fix up the nr_uninterruptible count:
         */
        // 下面宏定义task_contributes_to_load在linux/sched.h中，如下：
        // #define task_contributes_to_load(task)    /
        //        ((task->state & TASK_UNINTERRUPTIBLE) != 0 && /
        //         (task->flags & PF_FREEZING) == 0)
        // 判断两个条件：1.任务状态是否是TASK_UNINTERRUPTIBLE 2.标记为是否是PF_FREEZING 
        if (task_contributes_to_load(p)) {
            if (likely(cpu_online(orig_cpu))) /*检测cpu是否在线，Some places use cpu_online() where they should be using cpu_possible,most commonly  for tallying statistics*/ 
                rq->nr_uninterruptible--; /*nr_uninterruptible记录了该CPU不可中断状 态进程的个数，这里把它减1*/ 
            else
                this_rq()->nr_uninterruptible--; //this_rq取得当前CPU的运行队列 
        }
        p->state = TASK_WAKING; //设置进程状态为TASK_WAKING

if (p->sched_class->task_waking)
            p->sched_class->task_waking(rq, p); /*调用当前进程调度类的task_waking函数 ，进行唤醒操作 */

cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
        if (cpu != orig_cpu)
            set_task_cpu(p, cpu);
        __task_rq_unlock(rq);

rq = cpu_rq(cpu);
        spin_lock(&rq->lock);
        update_rq_clock(rq);

/*
         * We migrated the task without holding either rq->lock, however
         * since the task is not on the task list itself, nobody else
         * will try and migrate the task, hence the rq should match the
         * cpu we just moved it to.
         */ 
        WARN_ON(task_cpu(p) != cpu);
        WARN_ON(p->state != TASK_WAKING);

#ifdef CONFIG_SCHEDSTATS //对于需要收集调度器状态的情况 
        schedstat_inc(rq, ttwu_count);
        if (cpu == this_cpu)
            schedstat_inc(rq, ttwu_local);
        else {
            struct sched_domain *sd;
            for_each_domain(this_cpu, sd) {
                if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                    schedstat_inc(sd, ttwu_wake_remote);
                    break;
                }
            }
        }
    #endif /* CONFIG_SCHEDSTATS */

out_activate:
    #endif /* CONFIG_SMP */ 
        schedstat_inc(p, se.nr_wakeups);
        if (wake_flags & WF_SYNC)
            schedstat_inc(p, se.nr_wakeups_sync);
        if (orig_cpu != cpu)
            schedstat_inc(p, se.nr_wakeups_migrate);
        if (cpu == this_cpu)
            schedstat_inc(p, se.nr_wakeups_local);
        else
            schedstat_inc(p, se.nr_wakeups_remote);
        activate_task(rq, p, 1);
        success = 1;

/*
         * Only attribute actual wakeups done by this task.
         */ 
        if (!in_interrupt()) {
            struct sched_entity *se = ¤t->se;
            u64 sample = se->sum_exec_runtime;

if (se->last_wakeup)
                sample -= se->last_wakeup;
            else
                sample -= se->start_runtime;
            update_avg(&se->avg_wakeup, sample);

se->last_wakeup = se->sum_exec_runtime;
        }

out_running:
        //下面这两句是干什么用的我也不清楚，请高手指教。多谢多谢！！！ 
        trace_sched_wakeup(rq, p, success);
        check_preempt_curr(rq, p, wake_flags);

//设置当前进程状态 
        p->state = TASK_RUNNING;
    #ifdef CONFIG_SMP
        if (p->sched_class->task_woken)
            p->sched_class->task_woken(rq, p);

if (unlikely(rq->idle_stamp)) {
            u64 delta = rq->clock - rq->idle_stamp;
            u64 max = 2*sysctl_sched_migration_cost;

if (delta > max)
                rq->avg_idle = max;
            else
                update_avg(&rq->avg_idle, delta);
            rq->idle_stamp = 0;
        }
    #endif
    out:
        //解锁 
        task_rq_unlock(rq, &flags);
        //返回对当前处理器的引用 
        put_cpu();

return success;
    }

3、scullpipe中read的实现（简单休眠方法）

[cpp] view plain copy print ?

static ssize_t scull_p_read (struct file *filp, char __user *buf, size_t count,
loff_t *f_pos)
{
//scull_pipe是我们定义的一个设备结构体，在open的时候保存到了file->private_data中，其中包含了：
//wait_queue_head_t inq, outq; /* 读取和写入队列*/
//char *buffer, *end; /* 缓冲区的起始和结尾 */
//int buffersize; /* 用于指针计算 */
//char *rp, *wp; /* 读取和写入的位置 */
//int nreaders, nwriters; /* 用于读写打开的数量 */
//struct fasync_struct *async_queue; /* 异步读取者 */
//struct semaphore sem; /* 互斥信号量 */
//struct cdev cdev; /* 字符设备结构 */
struct scull_pipe *dev = filp->private_data;
if (down_interruptible(&dev->sem)) /*获取互斥信号量，加锁*/
return -ERESTARTSYS;
while (dev->rp == dev->wp) { /* 读写地址指针相同，表示没有可读数据，不能读*/
up(&dev->sem); /* 释放锁 */
if (filp->f_flags & O_NONBLOCK) //在数据没就绪时如果是非阻塞read，则马上返回
return -EAGAIN;
PDEBUG("/"%s/" reading: going to sleep/n", current->comm);
//如果是阻塞read，则在此处进入休眠，让出CPU
//休眠时使用了wait_event_interruptible宏
if (wait_event_interruptible(dev->inq, (dev->rp != dev->wp)))
//因为进程是可中断休眠的，所以可能进程接收到一个信号而被唤醒，这种唤醒
//的情况不应该继续该进程，而要让内核上层去处理事件，所以返回一个-ERESTARTSYS
return -ERESTARTSYS; //signal: tell the fs layer to handle it
//另外如果不是因为有信号而被唤醒，也不能确定有数据可读，所以还是要再进入while循环
//检查数据是否就绪。在进入循环前一定要再次获得信号量，不然没的释放了
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
}
/* ok, data is there, return something */
//虽说read函数已经传递进来了一个读取长度的参数count了，但是根据实际情况这个count可能
//会有变化的，下面的if...else根据不同情况重新确定了count的值，可以正确读取了。
if (dev->wp > dev->rp) //如果写入的位置大于读取的位置，这是比较正常的情况
//请求读取的数据不能超过写入的位置吧，还没写入怎么读呢？所以需要取两者最小值，
//这个好理解。
count = min(count, (size_t)(dev->wp - dev->rp));
else /* the write pointer has wrapped, return data up to dev->end */
//如果写入指针回卷，则取count 和读指针到文件尾这个块的最小值作为读取大小
count = min(count, (size_t)(dev->end - dev->rp));
//开始读了，使用copy_to_user
if (copy_to_user(buf, dev->rp, count)) {//读取失败了，返回还需要拷贝的内存数量值
up (&dev->sem); //释放信号量
return -EFAULT;
}
//读取成功了，copy_to_user返回0
dev->rp += count; //移动rp到新的位置
if (dev->rp == dev->end) //如果已经到了文件尾，则回卷到头部
dev->rp = dev->buffer; /* wrapped */
up (&dev->sem); //释放信号量
/* finally, awake any writers and return */
wake_up_interruptible(&dev->outq);
PDEBUG("/"%s/" did read %li bytes/n",current->comm, (long)count);
return count;
}

static ssize_t scull_p_read (struct file *filp, char __user *buf, size_t count,
                loff_t *f_pos)
    {
        //scull_pipe是我们定义的一个设备结构体，在open的时候保存到了file->private_data中，其中包含了：
         //wait_queue_head_t inq, outq;       /* 读取和写入队列*/
        //char *buffer, *end;                /* 缓冲区的起始和结尾 */
        //int buffersize;                    /* 用于指针计算 */
        //char *rp, *wp;                     /* 读取和写入的位置 */
        //int nreaders, nwriters;            /* 用于读写打开的数量 */
        //struct fasync_struct *async_queue; /* 异步读取者 */
        //struct semaphore sem;              /* 互斥信号量 */
        //struct cdev cdev;                  /* 字符设备结构 */ 
        struct scull_pipe *dev = filp->private_data;

if (down_interruptible(&dev->sem)) /*获取互斥信号量，加锁*/ 
            return -ERESTARTSYS;

while (dev->rp == dev->wp) { /* 读写地址指针相同，表示没有可读数据，不能读*/ 
            up(&dev->sem); /* 释放锁 */ 
            if (filp->f_flags & O_NONBLOCK) //在数据没就绪时如果是非阻塞read，则马上返回 
                return -EAGAIN;
            PDEBUG("/"%s/" reading: going to sleep/n", current->comm);
            //如果是阻塞read，则在此处进入休眠，让出CPU
            //休眠时使用了wait_event_interruptible宏 
            if (wait_event_interruptible(dev->inq, (dev->rp != dev->wp)))
              //因为进程是可中断休眠的，所以可能进程接收到一个信号而被唤醒，这种唤醒
                //的情况不应该继续该进程，而要让内核上层去处理事件，所以返回一个-ERESTARTSYS 
                return -ERESTARTSYS; //signal: tell the fs layer to handle it 
              //另外如果不是因为有信号而被唤醒，也不能确定有数据可读，所以还是要再进入while循环
                //检查数据是否就绪。在进入循环前一定要再次获得信号量，不然没的释放了 
            if (down_interruptible(&dev->sem))
                return -ERESTARTSYS;
        }
        /* ok, data is there, return something */
       //虽说read函数已经传递进来了一个读取长度的参数count了，但是根据实际情况这个count可能
        //会有变化的，下面的if...else根据不同情况重新确定了count的值，可以正确读取了。 
        if (dev->wp > dev->rp) //如果写入的位置大于读取的位置，这是比较正常的情况
            //请求读取的数据不能超过写入的位置吧，还没写入怎么读呢？所以需要取两者最小值，
            //这个好理解。 
            count = min(count, (size_t)(dev->wp - dev->rp));
        else /* the write pointer has wrapped, return data up to dev->end */ 
            //如果写入指针回卷，则取count 和 读指针到文件尾这个块的最小值作为读取大小 
            count = min(count, (size_t)(dev->end - dev->rp));

//开始读了，使用copy_to_user 
        if (copy_to_user(buf, dev->rp, count)) {//读取失败了，返回还需要拷贝的内存数量值 
            up (&dev->sem); //释放信号量 
            return -EFAULT;
        }
        //读取成功了，copy_to_user返回0 
        dev->rp += count; //移动rp到新的位置 
        if (dev->rp == dev->end) //如果已经到了文件尾，则回卷到头部 
            dev->rp = dev->buffer; /* wrapped */ 
        up (&dev->sem); //释放信号量

/* finally, awake any writers and return */ 
        wake_up_interruptible(&dev->outq);
        PDEBUG("/"%s/" did read %li bytes/n",current->comm, (long)count);
        return count;
    }

4、scullpipe中write的实现（高级休眠方法）

[cpp] view plain copy print ?

static ssize_t scull_p_write(struct file *filp, const char __user *buf, size_t count,
loff_t *f_pos)
{
struct scull_pipe *dev = filp->private_data; //不解释
int result;
if (down_interruptible(&dev->sem)) //不解释
return -ERESTARTSYS;
/* Make sure there's space to write */
result = scull_getwritespace(dev, filp); /*休眠代码在这个函数中，在下面单独学习. 总之这里确保新数据有可用的缓冲区空间并且在必要时休眠。 */
if (result) //result不是0表明没有可用的空间，直接返回-EAGAIN或-ERESTARTSYS
return result; /* scull_getwritespace called up(&dev->sem) */
/* ok, space is there, accept something */
//下面的就简单了，不解释
count = min(count, (size_t)spacefree(dev));
if (dev->wp >= dev->rp)
count = min(count, (size_t)(dev->end - dev->wp)); /* to end-of-buf */
else /* the write pointer has wrapped, fill up to rp-1 */
count = min(count, (size_t)(dev->rp - dev->wp - 1));
PDEBUG("Going to accept %li bytes to %p from %p/n", (long)count, dev->wp, buf);
if (copy_from_user(dev->wp, buf, count)) {
up (&dev->sem);
return -EFAULT;
}
dev->wp += count;
if (dev->wp == dev->end)
dev->wp = dev->buffer; /* wrapped */
up(&dev->sem);
/* finally, awake any reader */
wake_up_interruptible(&dev->inq); /* blocked in read() and select() */
/* and signal asynchronous readers, explained late in chapter 5 */
if (dev->async_queue)
kill_fasync(&dev->async_queue, SIGIO, POLL_IN);
PDEBUG("/"%s/" did write %li bytes/n",current->comm, (long)count);
return count;
}
下面学习以下scull_getwritespace函数，下面英文注释基本上已经清楚了，再细看一下内部实现。
/* Wait for space for writing; caller must hold device semaphore. On
* error the semaphore will be released before returning. */
static int scull_getwritespace(struct scull_pipe *dev, struct file *filp)
{
//spacefree是这个样子的,返回空缓冲区的大小
//static int spacefree(struct scull_pipe *dev)
//{
// if (dev->rp == dev->wp)
// return dev->buffersize - 1;
// return ((dev->rp + dev->buffersize - dev->wp) % dev->buffersize) - 1;
//}
//如果缓冲区还有可用的地方，则不进入while循环，直接返回0；如果没有，则进入while循环，进行休眠
while (spacefree(dev) == 0) { /* full */
DEFINE_WAIT(wait); //建立并初始化一个等待队列入口
up(&dev->sem); //休眠前必须释放信号量，必须必须！！！
if (filp->f_flags & O_NONBLOCK) //如果是非阻塞写入，则不休眠直接返回
return -EAGAIN;
PDEBUG("/"%s/" writing: going to sleep/n",current->comm);
//prepare_to_wait将等待队列入口添加到队列中，并设置进程状态
prepare_to_wait(&dev->outq, &wait, TASK_INTERRUPTIBLE);
if (spacefree(dev) == 0) //如果还是没有可用空间，则调用schedule，让出CPU，进入休眠
//这里必须再做一次检查，否则有可能失去唯一被唤醒的机会
schedule();
finish_wait(&dev->outq, &wait); //一旦schedule返回，则清理等待队列，设置进程状态
if (signal_pending(current)) //如果是中断信号唤醒的，则还是交给上层fs处理
return -ERESTARTSYS; /* signal: tell the fs layer to handle it */
if (down_interruptible(&dev->sem)) /*如果不是中断信号唤醒的，则再次进入while测试一下可用的空闲空间，之前要再次获得信号量 */
return -ERESTARTSYS;
}
return 0;
}