重新认识一下 epoll 的 ET 模式和 LT 模式吧?+ 源码验证 + 新疑惑

  • with ep_is_linked().

*/

ws = ep_wakeup_source(epi);

if (ws) {

if (ws->active)

__pm_stay_awake(ep->ws);

__pm_relax(ws);

}

list_del_init(&epi->rdllink);

/*

  • If the event mask intersect the caller-requested one,

  • deliver the event to userspace. Again, ep_scan_ready_list()

  • is holding ep->mtx, so no operations coming from userspace

  • can change the item.

*/

revents = ep_item_poll(epi, &pt, 1);

if (!revents)

continue;

// 复制到用户空间

if (__put_user(revents, &uevent->events) ||

__put_user(epi->event.data, &uevent->data)) {

list_add(&epi->rdllink, head);

ep_pm_stay_awake(epi);

if (!esed->res)

esed->res = -EFAULT;

return 0;

}

esed->res++;

uevent++;

if (epi->event.events & EPOLLONESHOT)

epi->event.events &= EP_PRIVATE_BITS;

else if (!(epi->event.events & EPOLLET)) {

/*

  • If this file has been added with Level

  • Trigger mode, we need to insert back inside

  • the ready list, so that the next call to

  • epoll_wait() will check again the events

  • availability. At this point, no one can insert

  • into ep->rdllist besides us. The epoll_ctl()

  • callers are locked out by

  • ep_scan_ready_list() holding “mtx” and the

  • poll callback will queue them in ep->ovflist.

*/

list_add_tail(&epi->rdllink, &ep->rdllist);

ep_pm_stay_awake(epi);

}

}

return 0;

}

好,这时候就有一个疑问了是吧,那如果我读完了,是不是也被添加到 rdlist 里面了?

对啊,一视同仁啊。

那什么时候被清理掉?总不能一直在里面,然后每次提交给我的 fd 有一堆都要报错吧?

对啊,of course. Why not? 当然是要善后的,怎么能让它这么消耗性能?

(英语不太好,找这段整整花了我半个小时时间,然后我就纳闷儿了,检查 rd 链表上是否有空 fd,那空 fd 是因为 LT,这里为什么对 LT/ET一视同仁了?难道 ET 也会产生空 fd?)先存疑吧,说不定哪天我就懂了,现在能力只能看到这一步了。哪天有机会遇上大佬可以请教请教。

/**

  • ep_scan_ready_list - Scans the ready list in a way that makes possible for

  •                  the scan code, to call f_op->poll(). Also allows for
    
  •                  O(NumReady) performance.
    
  • @ep: Pointer to the epoll private data structure.

  • @sproc: Pointer to the scan callback.

  • @priv: Private opaque data passed to the @sproc callback.

  • @depth: The current depth of recursive f_op->poll calls.

  • @ep_locked: caller already holds ep->mtx

  • Returns: The same integer error code returned by the @sproc callback.

*/

static __poll_t ep_scan_ready_list(struct eventpoll *ep,

__poll_t (*sproc)(struct eventpoll *,

struct list_head *, void *),void *priv,

int depth, bool ep_locked)

{

__poll_t res;

int pwake = 0;

struct epitem *epi, *nepi;

LIST_HEAD(txlist);

lockdep_assert_irqs_enabled();

/*

  • We need to lock this because we could be hit by

  • eventpoll_release_file() and epoll_ctl().

*/

if (!ep_locked)

mutex_lock_nested(&ep->mtx, depth);

/*

  • Steal the ready list, and re-init the original one to the

  • empty list. Also, set ep->ovflist to NULL so that events

  • happening while looping w/out locks, are not lost. We cannot

  • have the poll callback to queue directly on ep->rdllist,

  • because we want the “sproc” callback to be able to do it

  • in a lockless way.

*/

write_lock_irq(&ep->lock);

// 把就绪链表rdllist赋给临时的txlist,执行该操作后rdllist会被清空,

// 因为rdllist需要腾出来给其他进程继续往上放内容,

// 从而把txlist内epitem对应fd的就绪events复制到用户空间

list_splice_init(&ep->rdllist, &txlist);

WRITE_ONCE(ep->ovflist, NULL);

write_unlock_irq(&ep->lock);

/*

  • sproc就是前面设置好的ep_poll_callback,事件到来了执行该回调体,

  • sproc会把就绪的epitem放入rdllist或ovflist上

  • Now call the callback function.

*/

res = (*sproc)(ep, &txlist, priv);

write_lock_irq(&ep->lock);

/*

  • During the time we spent inside the “sproc” callback, some

  • other events might have been queued by the poll callback.

  • We re-insert them inside the main ready-list here.

*/

for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;

nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {

/*

  • We need to check if the item is already in the list.

  • During the “sproc” callback execution time, items are

  • queued into ->ovflist but the “txlist” might already

  • contain them, and the list_splice() below takes care of them.

*/

if (!ep_is_linked(epi)) {

/*

  • ->ovflist is LIFO, so we have to reverse it in order

  • to keep in FIFO.

*/

list_add(&epi->rdllink, &ep->rdllist);

ep_pm_stay_awake(epi);

}

}

/*

  • We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after

  • releasing the lock, events will be queued in the normal way inside

  • ep->rdllist.

*/

WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);

/*

  • 把水平触发EPOLLLT属性的epitem依旧挂回到rdllist,

  • 因为我们希望即使没有新的数据到来,只要数据还没被用户空间读完,就继续上报

  • Quickly re-inject items left on “txlist”.

*/

list_splice(&txlist, &ep->rdllist);

__pm_relax(ep->ws);

if (!list_empty(&ep->rdllist)) {

/*

  • Wake up (if active) both the eventpoll wait list and

  • the ->poll() wait list (delayed after we release the lock).

  • wake_up唤醒epoll_wait的调用者

*/

if (waitqueue_active(&ep->wq))

wake_up(&ep->wq);

if (waitqueue_active(&ep->poll_wait))

pwake++;

}

write_unlock_irq(&ep->lock);

if (!ep_locked)

mutex_unlock(&ep->mtx);

/* We have to call this outside the lock */

if (pwake)

ep_poll_safewake(&ep->poll_wait);

return res;

}


最后

腾讯T3大牛总结的500页MySQL实战笔记意外爆火,P8看了直呼内行

腾讯T3大牛总结的500页MySQL实战笔记意外爆火,P8看了直呼内行
ue_active(&ep->poll_wait))

pwake++;

}

write_unlock_irq(&ep->lock);

if (!ep_locked)

mutex_unlock(&ep->mtx);

/* We have to call this outside the lock */

if (pwake)

ep_poll_safewake(&ep->poll_wait);

return res;

}


最后

[外链图片转存中…(img-n6JUVyP7-1721201329837)]

[外链图片转存中…(img-ypJxeCIV-1721201329837)]

  • 21
    点赞
  • 29
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值