linux的poll机制内核代码简单分析

最新推荐文章于 2022-09-23 06:47:45 发布

爱晒太阳的小鲤鱼

最新推荐文章于 2022-09-23 06:47:45 发布

阅读量164

点赞数

本文链接：https://blog.csdn.net/weixin_39234635/article/details/102911845

版权

从应用层面，当上层应用调用poll函数时，会进入内核调用系统调用sys_poll

asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
			long timeout_msecs)
{
	s64 timeout_jiffies;

	if (timeout_msecs > 0) {
			timeout_jiffies = msecs_to_jiffies(timeout_msecs);
	} else {
		/* Infinite (< 0) or no (0) timeout */
		timeout_jiffies = timeout_msecs;
	}

	return do_sys_poll(ufds, nfds, &timeout_jiffies);
}

sys_poll会判断timeout_msecs是否为正数，如果是，就开始计时，如果不是，就把timeout_msecs传递给do_sys_poll函数


int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
{
	struct poll_wqueues table;
 	int fdcount, err;
 	unsigned int i;
	struct poll_list *head;
 	struct poll_list *walk;
	/* Allocate small arguments on the stack to save memory and be
	   faster - use long to make sure the buffer is aligned properly
	   on 64 bit archs to avoid unaligned access */
	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
	struct poll_list *stack_pp = NULL;
	
	/* Do a sanity check on nfds ... */
	if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
		return -EINVAL;

	poll_initwait(&table);

	head = NULL;
	walk = NULL;
	i = nfds;
	err = -ENOMEM;
	while(i!=0) {
		struct poll_list *pp;
		int num, size;
		if (stack_pp == NULL)
			num = N_STACK_PPS;
		else
			num = POLLFD_PER_PAGE;
		if (num > i)
			num = i;
		size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;
		if (!stack_pp)
			stack_pp = pp = (struct poll_list *)stack_pps;
		}
		pp->next=NULL;
		pp->len = num;
		if (head == NULL)
			head = pp;
		else
			walk->next = pp;

		walk = pp;
		if (copy_from_user(pp->entries, ufds + nfds-i, 
				sizeof(struct pollfd)*num)) {
			err = -EFAULT;
			goto out_fds;
		}
		i -= pp->len;
	}

	fdcount = do_poll(nfds, head, &table, timeout);

	/* OK, now copy the revents fields back to user space. */
	walk = head;
	err = -EFAULT;
	while(walk != NULL) {
		struct pollfd *fds = walk->entries;
		int j;

		for (j=0; j < walk->len; j++, ufds++) {
			if(__put_user(fds[j].revents, &ufds->revents))
				goto out_fds;
		}
		walk = walk->next;
  	}
	err = fdcount;
	if (!fdcount && signal_pending(current))
		err = -EINTR;

}

首先对nfds是否有超过rlimit的限定后调用poll_initwait函数

void poll_initwait(struct poll_wqueues *pwq)
{
	init_poll_funcptr(&pwq->pt, __pollwait);
	pwq->error = 0;
	pwq->table = NULL;
	pwq->inline_index = 0;
}

init_poll_funcptr(&pwq->pt, __pollwait)相当于pwq->pt.__pollwait
在这里就相当于table.pt.qproc = __pollwait
在从用户空间获得信息以后，调用do_poll函数，调用完do_poll函数后，就可以把信息传递给用户空间，说明核心在于do_poll。值得一提的是，因为返回的结果非常简单，所以调用_put_usr

static int do_poll(unsigned int nfds,  struct poll_list *list,
		   struct poll_wqueues *wait, s64 *timeout)
{
	int count = 0;
	poll_table* pt = &wait->pt;

 
	for (;;) {
		struct poll_list *walk;
		long __timeout;

		set_current_state(TASK_INTERRUPTIBLE);
		for (walk = list; walk != NULL; walk = walk->next) {
			struct pollfd * pfd, * pfd_end;

			pfd = walk->entries;
			pfd_end = pfd + walk->len;
			for (; pfd != pfd_end; pfd++) {
				/*
				 * Fish for events. If we found one, record it
				 * and kill the poll_table, so we don't
				 * needlessly register any other waiters after
				 * this. They'll get immediately deregistered
				 * when we break out and return.
				 */
				if (do_pollfd(pfd, pt)) {
					count++;
					pt = NULL;
				}
			}
		}
		/*
		 * All waiters have already been registered, so don't provide
		 * a poll_table to them on the next loop iteration.
		 */
		pt = NULL;
		if (count || !*timeout || signal_pending(current))
			break;
		count = wait->error;
		if (count)
			break;

	...

		__timeout = schedule_timeout(__timeout);
		if (*timeout >= 0)
			*timeout += __timeout;
	}
	__set_current_state(TASK_RUNNING);
	return count;
}

do_sys_poll函数中，fdcount = do_poll(nfds, head, &table, timeout); walk = head;我们可以看到监听文件描述符的结果将会放在head中，所以要重点关注do_poll的struct poll_list *list将会怎么样被初始化
简单分析do_poll函数
首先直接进入一个死循环里，在死循环中将当前进程设置为可中断睡眠状态，既然已经进入睡眠状态那为什么还直到后面才有schedule_timeout，有点奇怪
在这个死循环里嵌套了一个两层嵌套循环，这是为了处理不同类型的事件。
然后开始遍历poll_list list，真正来监控文件描述符的是do_pollfd(pfd, pt)，如果有结果了，计数，然后清空poll_table。
if (count || !*timeout || signal_pending(current))如果此次遍历所有的pfd都没有在do_pollfd得到返回正返回值，或者timeout没有耗尽，或者也没有signal在等待处理，那么将会调用schedule_timeout(__timeout)，使当前进程休眠_timeout长时间，如果在休眠时间内没有被打断（当然即使被打断，后面两行代码也很好地处理掉了），则会再次循环，但是当到达if (count || !*timeout || signal_pending(current));时，此时timeout已经耗尽了，而不用管另外两个条件是否成立，所以直接返回。
返回后设置当前进程为运行态。
原来真正用来监听pfd是do_pollfd

static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
{
	unsigned int mask;
	int fd;

	mask = 0;
	fd = pollfd->fd;
	if (fd >= 0) {
		int fput_needed;
		struct file * file;

		file = fget_light(fd, &fput_needed);
		mask = POLLNVAL;
		if (file != NULL) {
			mask = DEFAULT_POLLMASK;
			if (file->f_op && file->f_op->poll)
				mask = file->f_op->poll(file, pwait);
			/* Mask out unneeded events. */
			mask &= pollfd->events | POLLERR | POLLHUP;
			fput_light(file, fput_needed);
		}
	}
	pollfd->revents = mask;

	return mask;
}

核心代码是mask = file->f_op->poll(file, pwait);pollfd->revents = mask; 然后返回mask
file是怎么来的，就是通过fd获得的，fget_light函数有点像应用层open函数的逆向过程。调用file的file_operation中的poll函数指针获得一个mask，然后把mask赋值给pollfd->event，最后将mask返回，然后我们在do_poll来判断计数
通过从应用层往下分析，最终我们知道要实现应用层的poll调用，需要在驱动层为poll函数指针实现一个可以返回mask的函数

爱晒太阳的小鲤鱼

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
linux的poll机制内核代码简单分析

从应用层面，当上层应用调用poll函数时，会进入内核调用系统调用sys_pollasmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, long timeout_msecs){ s64 timeout_jiffies; if (timeout_msecs > 0) { timeout...
复制链接

扫一扫