linux内核DEFINE_WAKE_Q的使用

DEFINE_WAKE_Q在内核等待某个资源或数据时,使用是很广泛的,先通过一段代码(节选自linux内核代码)来了解其使用方法,再通过上一篇文章

linux进程间通信msg的内核实现_多多是小坏熊的博客-CSDN博客来介绍内核是如何实现等待对应资源的。


#include <stdio.h>

struct wake_q_node {
	struct wake_q_node *next;
};

struct wake_q_head {
	struct wake_q_node *first;
	struct wake_q_node **lastp;
};

#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)

#define DEFINE_WAKE_Q(name)				\
	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }

typedef __pid_t pid_t;

struct task_struct
{
    pid_t pid; //进程ID
    struct wake_q_node wake_q;
};

void wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
	struct wake_q_node *node = &task->wake_q;

	/*
	 * Atomically grab the task, if ->wake_q is !nil already it means
	 * its already queued (either by us or someone else) and will get the
	 * wakeup due to that.
	 *
	 * This cmpxchg() implies a full barrier, which pairs with the write
	 * barrier implied by the wakeup in wake_up_q().
	 */
	//if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
	//	return;

	//get_task_struct(task);

	/*
	 * The head is context local, there can be no concurrency.
	 */
	*head->lastp = node; // lastp指向指针1,修改指针1指向的内容
	head->lastp = &node->next; // 修改lastp指向的内容
}

#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

#define container_of(ptr, type, member) ({          \
    const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
    (type *)( (char *)__mptr - offsetof(type,member) );})

void wake_up_q(struct wake_q_head *head)
{
	struct wake_q_node *node = head->first;

	while (node != WAKE_Q_TAIL) {
		struct task_struct *task;

		task = container_of(node, struct task_struct, wake_q);
		//BUG_ON(!task);
		/* Task can safely be re-inserted now: */
		node = node->next;
		task->wake_q.next = NULL;

		/*
		 * wake_up_process() implies a wmb() to pair with the queueing
		 * in wake_q_add() so as not to miss wakeups.
		 */
		//wake_up_process(task);
		//put_task_struct(task);

        // 测试代码
        printf("wake up process: %d\n", task->pid);
	}
}

int main()
{
    // 定义唤醒队列
    DEFINE_WAKE_Q(wake_queue);

    // 定义3个进程对象
    struct task_struct t1 = {.pid=123, .wake_q = WAKE_Q_TAIL};
    struct task_struct t2 = {.pid=456, .wake_q = WAKE_Q_TAIL};
    struct task_struct t3 = {.pid=789, .wake_q = WAKE_Q_TAIL};

    // 打印进程对象上wake_q的地址
    printf("[%d] wake_q: %p\n", t1.pid, &t1.wake_q);
    printf("[%d] wake_q: %p\n", t2.pid, &t2.wake_q);
    printf("[%d] wake_q: %p\n", t3.pid, &t3.wake_q);

    // 初始化时wake_queue.first,和wake_queue.lastp指向的地址
    printf("bg: wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);

    // 添加等待对象后,wake_queue.first,和wake_queue.lastp指向的地址
    wake_q_add(&wake_queue, &t1);
    printf("1:  wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
    wake_q_add(&wake_queue, &t2);
    printf("2:  wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);
    wake_q_add(&wake_queue, &t3);
    printf("3:  wake_queue.first: %p, wake_queue.lastp: %p\n", wake_queue.first, wake_queue.lastp);

    // 依次唤醒添加到wake_q的几个进程
    wake_up_q(&wake_queue);

    return 0;
}

打印结果如下:

[123] wake_q: 0x7ffe2b17f2e8
[456] wake_q: 0x7ffe2b17f2d8
[789] wake_q: 0x7ffe2b17f2c8
bg: wake_queue.first: 0x1, wake_queue.lastp: 0x7ffe2b17f2f0
1:  wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2e8
2:  wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2d8
3:  wake_queue.first: 0x7ffe2b17f2e8, wake_queue.lastp: 0x7ffe2b17f2c8
wake up process: 123
wake up process: 456
wake up process: 789

调用wake_q_add将task_struct对象上的wake_q,连接到wake_queue上,调用3次wake_q_add后,链接的数据为:wake_queue -> t1.wake_q -> t2.wake_q -> t3.wake_q -> WAKE_Q_TAIL。

wake_up_q中,依次遍历出几个wake_q,取到对应的进程对象task_struct,唤醒进程。

下面看下内核中,是如何使用的:

下面是接收msg消息的处理逻辑

static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
	       long (*msg_handler)(void __user *, struct msg_msg *, size_t))
{
	int mode;
	struct msg_queue *msq;
	struct ipc_namespace *ns;
	struct msg_msg *msg, *copy = NULL;
	DEFINE_WAKE_Q(wake_q);

	ns = current->nsproxy->ipc_ns;

	if (msqid < 0 || (long) bufsz < 0)
		return -EINVAL;

	// 设置了复制标记
	if (msgflg & MSG_COPY) {
		if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
			return -EINVAL;
		copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
		if (IS_ERR(copy))
			return PTR_ERR(copy);
	}

	// 对消息类型进行转换
	mode = convert_mode(&msgtyp, msgflg);

	rcu_read_lock();
	// 获取消息ID对应的msg_queue,与msgsnd一样的逻辑
	msq = msq_obtain_object_check(ns, msqid);
	if (IS_ERR(msq)) {
		rcu_read_unlock();
		free_copy(copy);
		return PTR_ERR(msq);
	}

	for (;;) {
		struct msg_receiver msr_d;

		... ...

		// 查找消息
		msg = find_msg(msq, &msgtyp, mode);
		if (!IS_ERR(msg)) {
			... ...

			goto out_unlock0;
		}

		... ...

		// 没有可读的消息,将msg_receiver连接到q_receivers链表上
		list_add_tail(&msr_d.r_list, &msq->q_receivers);
		msr_d.r_tsk = current;
		msr_d.r_msgtype = msgtyp;
		msr_d.r_mode = mode;
		if (msgflg & MSG_NOERROR)
			msr_d.r_maxsize = INT_MAX;
		else
			msr_d.r_maxsize = bufsz;
		msr_d.r_msg = ERR_PTR(-EAGAIN);
		__set_current_state(TASK_INTERRUPTIBLE);

		ipc_unlock_object(&msq->q_perm);
		rcu_read_unlock();
		schedule(); // 执行调度,让出cpu

		... ...
		msg = READ_ONCE(msr_d.r_msg);
		if (msg != ERR_PTR(-EAGAIN))
			goto out_unlock1;

		 /*
		  * ... or see -EAGAIN, acquire the lock to check the message
		  * again.
		  */
		ipc_lock_object(&msq->q_perm);

		msg = msr_d.r_msg;
		if (msg != ERR_PTR(-EAGAIN))
			goto out_unlock0;

		// 将msg_receiver从q_receivers链表上删除
		list_del(&msr_d.r_list);

		// 检查当前进程是否有信号需要处理(不为0,说明有信号)
		if (signal_pending(current)) {
			msg = ERR_PTR(-ERESTARTNOHAND);
			goto out_unlock0;
		}

		ipc_unlock_object(&msq->q_perm);
	}

out_unlock0:
	ipc_unlock_object(&msq->q_perm);
	wake_up_q(&wake_q);
out_unlock1:
	rcu_read_unlock();
	if (IS_ERR(msg)) {
		free_copy(copy);
		return PTR_ERR(msg);
	}

	bufsz = msg_handler(buf, msg, bufsz);
	free_msg(msg);

	return bufsz;
}

list_add_tail(&msr_d.r_list, &msq->q_receivers); 

如果没有待读取的消息,将struct msg_receiver对象msr_d添加到q_receivers列表上,然后执行调度,让出cpu。后面还会看到q_receivers,重点注意下。

下面看下发送msg消息的核心处理代码

static long do_msgsnd(int msqid, long mtype, void __user *mtext,
		size_t msgsz, int msgflg)
{
	struct msg_queue *msq;
	struct msg_msg *msg;
	int err;
	struct ipc_namespace *ns;
	DEFINE_WAKE_Q(wake_q); // 等待列表,重点注意这个变量

	ns = current->nsproxy->ipc_ns;

	... ...

	// 重点看下这个函数
	if (!pipelined_send(msq, msg, &wake_q)) {
		/* no one is waiting for this message, enqueue it */
		list_add_tail(&msg->m_list, &msq->q_messages);
		msq->q_cbytes += msgsz;
		msq->q_qnum++;
		atomic_add(msgsz, &ns->msg_bytes);
		atomic_inc(&ns->msg_hdrs);
	}

	err = 0;
	msg = NULL;

out_unlock0:
	ipc_unlock_object(&msq->q_perm);
	wake_up_q(&wake_q);
out_unlock1:
	rcu_read_unlock();
	if (msg != NULL)
		free_msg(msg);
	return err;
}
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
				 struct wake_q_head *wake_q)
{
	struct msg_receiver *msr, *t;

	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
		if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
					       msr->r_msgtype, msr->r_mode)) {

			list_del(&msr->r_list);
			if (msr->r_maxsize < msg->m_ts) { // m_ts为消息大小
				wake_q_add(wake_q, msr->r_tsk);
				WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); // 消息太大
			} else {
				msq->q_lrpid = task_pid_vnr(msr->r_tsk); // 最后一个接收消息的pid
				msq->q_rtime = get_seconds(); //最后接收消息的时间

				wake_q_add(wake_q, msr->r_tsk); // msr->r_tsk是等待接收消息的进程对象指针
				WRITE_ONCE(msr->r_msg, msg); 将msg加到msr->r_msg上
				return 1;
			}
		}
	}

	return 0;
}

pipelined_send函数中,q_receivers链表上,是一个个msg_receiver对象,每一个对象,对应一个进程。将待发送的消息,添加到msg_receiver对象上后,将进程对象添加到wake_q列表中。wake_q在do_msgsnd中定义。

在do_msgsnd最后,调用wake_up_q唤醒等待的进程。

被唤醒的进行,继续执行do_msgrcv函数中schedule后面的逻辑,将消息返回给用户空间。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值