一 几个重要的结构体以及关系
struct eventpoll { //linux-5.15.5\fs\eventpoll.c
struct mutex mtx;
wait_queue_head_t wq; //重要变量—等待队列
wait_queue_head_t poll_wait;
struct list_head rdllist; //List of ready file descriptors
struct rb_root rbr; // RB tree root used to store monitored fd structs
rwlock_t lock; //Lock which protects rdllist and ovflist //自旋锁
//自旋锁特点:得不到锁不会引起进程休眠
struct epitem *ovflist;
struct file *file;
}
struct epitem {
struct rb_node rbn;
struct list_head rdllink; //跟eventpoll同名变量
struct eppoll_entry *pwqlist; //重要
struct eventpoll *ep; //epitem包括eventpoll
struct epoll_event event;
}
struct eppoll_entry {
struct epitem *base; //有epitem
wait_queue_entry_t wait;
wait_queue_head_t *whead;
}
typedef struct wait_queue_entry wait_queue_entry_t;
struct wait_queue_entry {
unsigned int flags;
void *private;
wait_queue_func_t func; //ep_poll_callback
struct list_head entry;
};
//ep_insert、回调函数ep_ptable_queue_proc中有用到
struct ep_pqueue {
poll_table pt;
struct epitem *epi;
};
typedef struct poll_table_struct {--- include\linux\poll.h
poll_queue_proc _qproc;
__poll_t _key;
} poll_table;
//一切皆file,socket也有等待队列
struct file { -- include\linux\fs.h
const struct file_operations *f_op;
void *private_data;
}
const struct proto_ops inet_stream_ops -- af_inet.c
.poll = tcp_poll,
struct sock {-- linux-5.15.5/include/net/sock.h
{
u32 sk_ack_backlog; //当前全连接队列已有数据个数,
struct socket_wq __rcu *sk_wq; //重要
struct socket *sk_socket;
struct sk_buff_head sk_receive_queue; //接收队列
void (*sk_data_ready)(struct sock *sk);
}
struct socket_wq {-- include\linux\net.h
wait_queue_head_t wait;//等待队列,双循环链表
}
struct socket {-- include/linux/net.h
struct file *file;
struct sock *sk;
const struct proto_ops *ops;
struct socket_wq wq; //重要变量
};
二 epoll_ctl--以ADD为例
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
do_epoll_ctl(int epfd, int op,{
1 struct eventpoll *ep = f.file->private_data;
2 epi = ep_find(ep, tf.file, fd); //根据fd,在红黑树上查找是否有该节点
3 case EPOLL_CTL_ADD:
error = ep_insert(ep, epds, tf.file, fd, full_check);
}
ep_insert(struct eventpoll *ep,{
1 新建epitem *epi, ep_rbtree_insert(ep, epi);
struct ep_pqueue epq;
2 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
3 ep_item_poll(epi, &epq.pt, 1);
--| poll_wait(file, &ep->poll_wait, wait); p->_qproc(filp, wait_address, p);
查找想要的事件,是否在就绪列表中
}
//poll_wait会回调到这
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt)
{
struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt);
struct epitem *epi = epq->epi;
struct eppoll_entry *pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL);
// pwq->wait.fun == ep_poll_callback
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
//把pwq->wait->entry加到socket->wq.wait,重点
add_wait_queue_exclusive(whead, &pwq->wait);
--| __add_wait_queue_entry_tail(wq_head, wq_entry);
--| list_add_tail(&wq_entry->entry, &wq_head->head);
epi->pwqlist = pwq;
}
//怎么回调到这,见四
ep_poll_callback(wait_queue_entry_t *wait,
{
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
list_add_tail_lockless(&epi->rdllink, &ep->rdllist)
//把epi->rdllink添加到ep->rdllist, epoll不需要遍历的原因
wake_up(&ep->wq);
}
三 epoll_wait
do_epoll_wait(int epfd,
{
struct eventpoll *ep = f.file->private_data;
error = ep_poll(ep, events, maxevents, to);
}
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1 rdllist有没有就绪事件。
2 如果有就绪事件,ep_send_events。
static int ep_send_events(struct eventpoll *ep, //向用户态拷贝数据
{
1 ep_start_scan(ep, &txlist); //ep->rdllist添加到txlist
2 wakeup、删除epi->rdllink、ep_item_poll(epi, &pt, 1);、
events = epoll_put_uevent(revents, epi->event.data, events);
else if (!(epi->event.events & EPOLLET)) {
//LT和ET不同的地方
/** If this file has been added with Level
* Trigger mode, we need to insert back inside
* the ready list, so that the next call to
* epoll_wait() will check again the events
* availability. At this point, no one can insert
* into ep->rdllist besides us. The epoll_ctl()
* callers are locked out by
* ep_scan_ready_list() holding "mtx" and the
* poll callback will queue them in ep->ovflist.
*/
list_add_tail(&epi->rdllink, &ep->rdllist);
}
3 ep_done_scan(ep, &txlist);
}
四 数据到来
int tcp_v4_rcv(struct sk_buff *skb)
| -> sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
| -> tcp_rcv_established(sk, skb);
| -> tcp_queue_rcv(sk, skb, &fragstolen);
| -> __skb_queue_tail(&sk->sk_receive_queue, skb);
//skb放到sk_receive_queue队尾
void sock_def_readable(struct sock *sk)
--| wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | ...);
--| __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
--| __wake_up_common(struct wait_queue_head *wq_head,
--| curr->func(curr, mode, wake_flags, key);
即default_wake_function,即ep_poll_callback