DEFINE_WAKE_Q在内核等待某个资源或数据时,使用是很广泛的,先通过一段代码(节选自linux内核代码)来了解其使用方法,再通过上一篇文章
linux进程间通信msg的内核实现_多多是小坏熊的博客-CSDN博客来介绍内核是如何实现等待对应资源的。
#include <stdio.h>
struct wake_q_node {
struct wake_q_node *next;
};
struct wake_q_head {
struct wake_q_node *first;
struct wake_q_node **lastp;
};
#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
#define DEFINE_WAKE_Q(name) \
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
typedef __pid_t pid_t;
struct task_struct
{
pid_t pid; //进程ID
struct wake_q_node wake_q;
};
void wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
struct wake_q_node *node = &task->wake_q;
/*
* Atomically grab the task, if ->wake_q is !nil already it means
* its already queued (either by us or someone else) and will get the
* wakeup due to that.
*
* This cmpxchg() implies a full barrier, which pairs with the write
* barrier implied by the wakeup in wake_up_q().
*/
//if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
// return;
//get_task_struct(task);
/*
* The head is context local, there can be no concurrency.
*/
*head->lastp = node; // lastp指向指针1,修改指针1指向的内容
head->lastp = &node->next; // 修改lastp指向的内容
}
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
void wake_up_q(struct wake_q_head *head)
{
struct wake_q_node *node = head->first;
while (node != WAKE_Q_TAIL) {
struct task_struct *task;
task = container_of(node, struct task_struct, wake_q);
//BUG_ON(!task);
/* Task can safely be re-inserted now: */
node = node->next;
task->wake_q.next = NULL;
/*
* wake_up_process() implies a wmb() to pair with the queueing
* in wake_q_add() so as not to miss wakeups.
*/
//wake_up_process(task);
//put_task_struct(task);
// 测试代码
printf("wake up process: %d\n", task->pid);
}
}
int main()
{
// 定义唤醒队列
DEFINE_WAKE_Q(wake_queue);
// 定义3个进程对象
struct task_struct t1 = {.pid=123, .wake_q = WAKE_Q_TAIL};
struct task_struct t2 = {.pid=456, .wake_q = WAKE_Q_TAIL};
struct task_struct t3 = {.pid=789, .wake_q = WAKE_Q_TAIL};
// 打印进程对象上wake_q的地址
printf("[%d] wake_q: %p\n", t1.pid, &t1.wake_q);
printf("[%d] wake_q: %p\n", t2.pid, &t2.wake_q);
printf("[%d] wake_q: %p\n", t3.pid, &t3.wake_q);
// 初始化时wake_queue.first,和wake_queue.lastp指向的地址
printf("bg: wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
// 添加等待对象后,wake_queue.first,和wake_queue.lastp指向的地址
wake_q_add(&wake_queue, &t1);
printf("1: wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
wake_q_add(&wake_queue, &t2);
printf("2: wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
wake_q_add(&wake_queue, &t3);
printf("3: wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
// 依次唤醒添加到wake_q的几个进程
wake_up_q(&wake_queue);
return 0;
}
打印结果如下:
[123] wake_q: 0x7ffe2b17f2e8
[456] wake_q: 0x7ffe2b17f2d8
[789] wake_q: 0x7ffe2b17f2c8
bg: wake_queue.first: 0x1, wake_queue.lastp: 0x7ffe2b17f2f0
1: wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2e8
2: wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2d8
3: wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2c8
wake up process: 123
wake up process: 456
wake up process: 789
调用wake_q_add将task_struct对象上的wake_q,连接到wake_queue上,调用3次wake_q_add后,链接的数据为:wake_queue -> t1.wake_q -> t2.wake_q -> t3.wake_q -> WAKE_Q_TAIL。
wake_up_q中,依次遍历出几个wake_q,取到对应的进程对象task_struct,唤醒进程。
下面看下内核中,是如何使用的:
下面是接收msg消息的处理逻辑
static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
long (*msg_handler)(void __user *, struct msg_msg *, size_t))
{
int mode;
struct msg_queue *msq;
struct ipc_namespace *ns;
struct msg_msg *msg, *copy = NULL;
DEFINE_WAKE_Q(wake_q);
ns = current->nsproxy->ipc_ns;
if (msqid < 0 || (long) bufsz < 0)
return -EINVAL;
// 设置了复制标记
if (msgflg & MSG_COPY) {
if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
return -EINVAL;
copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
if (IS_ERR(copy))
return PTR_ERR(copy);
}
// 对消息类型进行转换
mode = convert_mode(&msgtyp, msgflg);
rcu_read_lock();
// 获取消息ID对应的msg_queue,与msgsnd一样的逻辑
msq = msq_obtain_object_check(ns, msqid);
if (IS_ERR(msq)) {
rcu_read_unlock();
free_copy(copy);
return PTR_ERR(msq);
}
for (;;) {
struct msg_receiver msr_d;
... ...
// 查找消息
msg = find_msg(msq, &msgtyp, mode);
if (!IS_ERR(msg)) {
... ...
goto out_unlock0;
}
... ...
// 没有可读的消息,将msg_receiver连接到q_receivers链表上
list_add_tail(&msr_d.r_list, &msq->q_receivers);
msr_d.r_tsk = current;
msr_d.r_msgtype = msgtyp;
msr_d.r_mode = mode;
if (msgflg & MSG_NOERROR)
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = bufsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
__set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm);
rcu_read_unlock();
schedule(); // 执行调度,让出cpu
... ...
msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock1;
/*
* ... or see -EAGAIN, acquire the lock to check the message
* again.
*/
ipc_lock_object(&msq->q_perm);
msg = msr_d.r_msg;
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0;
// 将msg_receiver从q_receivers链表上删除
list_del(&msr_d.r_list);
// 检查当前进程是否有信号需要处理(不为0,说明有信号)
if (signal_pending(current)) {
msg = ERR_PTR(-ERESTARTNOHAND);
goto out_unlock0;
}
ipc_unlock_object(&msq->q_perm);
}
out_unlock0:
ipc_unlock_object(&msq->q_perm);
wake_up_q(&wake_q);
out_unlock1:
rcu_read_unlock();
if (IS_ERR(msg)) {
free_copy(copy);
return PTR_ERR(msg);
}
bufsz = msg_handler(buf, msg, bufsz);
free_msg(msg);
return bufsz;
}
list_add_tail(&msr_d.r_list, &msq->q_receivers);
如果没有待读取的消息,将struct msg_receiver对象msr_d添加到q_receivers列表上,然后执行调度,让出cpu。后面还会看到q_receivers,重点注意下。
下面看下发送msg消息的核心处理代码
static long do_msgsnd(int msqid, long mtype, void __user *mtext,
size_t msgsz, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
int err;
struct ipc_namespace *ns;
DEFINE_WAKE_Q(wake_q); // 等待列表,重点注意这个变量
ns = current->nsproxy->ipc_ns;
... ...
// 重点看下这个函数
if (!pipelined_send(msq, msg, &wake_q)) {
/* no one is waiting for this message, enqueue it */
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
atomic_add(msgsz, &ns->msg_bytes);
atomic_inc(&ns->msg_hdrs);
}
err = 0;
msg = NULL;
out_unlock0:
ipc_unlock_object(&msq->q_perm);
wake_up_q(&wake_q);
out_unlock1:
rcu_read_unlock();
if (msg != NULL)
free_msg(msg);
return err;
}
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
struct wake_q_head *wake_q)
{
struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) {
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) { // m_ts为消息大小
wake_q_add(wake_q, msr->r_tsk);
WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); // 消息太大
} else {
msq->q_lrpid = task_pid_vnr(msr->r_tsk); // 最后一个接收消息的pid
msq->q_rtime = get_seconds(); //最后接收消息的时间
wake_q_add(wake_q, msr->r_tsk); // msr->r_tsk是等待接收消息的进程对象指针
WRITE_ONCE(msr->r_msg, msg); 将msg加到msr->r_msg上
return 1;
}
}
}
return 0;
}
pipelined_send函数中,q_receivers链表上,是一个个msg_receiver对象,每一个对象,对应一个进程。将待发送的消息,添加到msg_receiver对象上后,将进程对象添加到wake_q列表中。wake_q在do_msgsnd中定义。
在do_msgsnd最后,调用wake_up_q唤醒等待的进程。
被唤醒的进行,继续执行do_msgrcv函数中schedule后面的逻辑,将消息返回给用户空间。