epoll源码学习记录

一 几个重要的结构体以及关系

struct eventpoll { //linux-5.15.5\fs\eventpoll.c
  struct mutex  mtx;
  wait_queue_head_t wq; //重要变量—等待队列
  wait_queue_head_t poll_wait;
  struct  list_head  rdllist; //List of ready file descriptors
  struct  rb_root  rbr; // RB tree root used to store monitored fd structs
  rwlock_t  lock; //Lock which protects rdllist and ovflist //自旋锁
  //自旋锁特点:得不到锁不会引起进程休眠
  struct epitem *ovflist;
  struct file *file;
}

struct epitem {
    struct  rb_node  rbn;
	struct  list_head  rdllink; //跟eventpoll同名变量
    struct eppoll_entry *pwqlist; //重要
    struct  eventpoll *ep; //epitem包括eventpoll
    struct epoll_event event;
}

struct eppoll_entry {
  struct epitem *base; //有epitem
  wait_queue_entry_t wait;
  wait_queue_head_t *whead;
}
typedef struct wait_queue_entry wait_queue_entry_t;
struct wait_queue_entry {
	unsigned int		flags;
	void			*private;
	wait_queue_func_t	func; //ep_poll_callback
	struct list_head	entry;
};

//ep_insert、回调函数ep_ptable_queue_proc中有用到
struct ep_pqueue {
	poll_table pt;
	struct epitem *epi;
};
typedef struct poll_table_struct {--- include\linux\poll.h
	poll_queue_proc _qproc;
	__poll_t _key;
} poll_table;


//一切皆file,socket也有等待队列
struct file { -- include\linux\fs.h
  const struct file_operations	*f_op;
  void			*private_data;
}

const struct proto_ops inet_stream_ops -- af_inet.c
.poll		   = tcp_poll,

struct sock {-- linux-5.15.5/include/net/sock.h
{
 u32			sk_ack_backlog; //当前全连接队列已有数据个数,
 struct socket_wq __rcu	*sk_wq; //重要
 struct socket		*sk_socket;
 struct sk_buff_head	 sk_receive_queue; //接收队列
 void			(*sk_data_ready)(struct sock *sk);
}
struct socket_wq {-- include\linux\net.h	
	wait_queue_head_t	wait;//等待队列,双循环链表
}

struct socket {-- include/linux/net.h
	struct file		*file;
	struct sock		*sk;
	const struct proto_ops	*ops;
	struct socket_wq	wq; //重要变量
};

二 epoll_ctl--以ADD为例

SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
do_epoll_ctl(int epfd, int op,{
1 struct eventpoll *ep = f.file->private_data;

2 epi = ep_find(ep, tf.file, fd); //根据fd,在红黑树上查找是否有该节点

3 case EPOLL_CTL_ADD:
  error = ep_insert(ep, epds, tf.file, fd, full_check);
}


ep_insert(struct eventpoll *ep,{
1 新建epitem *epi, ep_rbtree_insert(ep, epi);

   struct ep_pqueue epq;
2  init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); 

3  ep_item_poll(epi, &epq.pt, 1);
   --| poll_wait(file, &ep->poll_wait, wait); p->_qproc(filp, wait_address, p);
       查找想要的事件,是否在就绪列表中
}

//poll_wait会回调到这
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt)
{
	struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt);
	struct epitem *epi = epq->epi;
    struct eppoll_entry *pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL);
    // pwq->wait.fun ==  ep_poll_callback
    init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
    pwq->whead = whead;
    pwq->base = epi;

    //把pwq->wait->entry加到socket->wq.wait,重点
    add_wait_queue_exclusive(whead, &pwq->wait);
    --| __add_wait_queue_entry_tail(wq_head, wq_entry); 
       --| list_add_tail(&wq_entry->entry, &wq_head->head);

    epi->pwqlist = pwq;
}

//怎么回调到这,见四
ep_poll_callback(wait_queue_entry_t *wait,
{
  struct epitem *epi = ep_item_from_wait(wait);
  struct eventpoll *ep = epi->ep;
  list_add_tail_lockless(&epi->rdllink, &ep->rdllist) 
  //把epi->rdllink添加到ep->rdllist, epoll不需要遍历的原因
  
  wake_up(&ep->wq);
}

三 epoll_wait

do_epoll_wait(int epfd,
{
  struct eventpoll *ep = f.file->private_data;
  error = ep_poll(ep, events, maxevents, to);
}

static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1 rdllist有没有就绪事件。
2 如果有就绪事件,ep_send_events。
static int ep_send_events(struct eventpoll *ep, //向用户态拷贝数据
{
1 ep_start_scan(ep, &txlist); //ep->rdllist添加到txlist

2 wakeup、删除epi->rdllink、ep_item_poll(epi, &pt, 1);、
events = epoll_put_uevent(revents, epi->event.data, events);
else if (!(epi->event.events & EPOLLET)) { 
//LT和ET不同的地方
/** If this file has been added with Level
			 * Trigger mode, we need to insert back inside
			 * the ready list, so that the next call to
			 * epoll_wait() will check again the events
			 * availability. At this point, no one can insert
			 * into ep->rdllist besides us. The epoll_ctl()
			 * callers are locked out by
			 * ep_scan_ready_list() holding "mtx" and the
			 * poll callback will queue them in ep->ovflist.
			 */
list_add_tail(&epi->rdllink, &ep->rdllist);
}

3 ep_done_scan(ep, &txlist);
}

四 数据到来

int tcp_v4_rcv(struct sk_buff *skb)
   | -> sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
        tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
        | -> tcp_rcv_established(sk, skb);
            | -> tcp_queue_rcv(sk, skb, &fragstolen);
               | -> __skb_queue_tail(&sk->sk_receive_queue, skb); 
               //skb放到sk_receive_queue队尾


void sock_def_readable(struct sock *sk)
--| wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | ...);
    --| __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
       --| __wake_up_common(struct wait_queue_head *wq_head,
          --| curr->func(curr, mode, wake_flags, key);
              即default_wake_function,即ep_poll_callback
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值
>