/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
static bool event_notifier_poll(void *opaque)
{
EventNotifier *e = opaque;
AioContext *ctx = container_of(e, AioContext, notifier);
return atomic_read(&ctx->notified);
}
AioContext *
aio_context_new()
{
int ret;
AioContext *ctx;
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); // 注册事件循环个阶段回调
ret = event_notifier_init(&ctx->notifier, false); // 使用eventfd初始化EventNotifier
if (ret < 0) {
g_print("%s Failed to initialize event notifier\n", __FUNCTION__);
goto fail;
}
qemu_lockcnt_init(&ctx->list_lock);
/* 设置ctx监听EventNotifier的rfd,并使用定制的event_notifier_poll替代事件循环中的poll */
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)
event_notifier_dummy_cb,
event_notifier_poll);
return ctx;
fail:
g_source_destroy(&ctx->source);
return NULL;
}
使能BH,通过qemu_bh_schedule通知事件循环执行一次bh的调度
void aio_notify(AioContext *ctx)
{
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
* with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
*/
smp_mb();
if (ctx->notify_me) {
/* 写EventNotifier中的wfd,通知事件循环所在的线程 */
event_notifier_set(&ctx->notifier);
/* 设置notified为true,当事件循环调用event_notifier_poll时会返回true,表明poll到事件,触发bh */
atomic_mb_set(&ctx->notified, true);
}
}
void qemu_bh_schedule(QEMUBH *bh)
{
AioContext *ctx;
ctx = bh->ctx;
bh->idle = 0;
/* The memory barrier implicit in atomic_xchg makes sure that:
* 1. idle & any writes needed by the callback are done before the
* locations are read in the aio_bh_poll.
* 2. ctx is loaded before scheduled is set and the callback has a chance
* to execute.
*/
/* atomic_xchg的意思是设置scheduled为1并取出其原来的值,这个操作是原子的
* 只有当scheduled原来的值是0,才通知事件循环的线程有fd准备好了
* qemu_bh_schedule和aio_bh_poll中的atomic_xchg是一对
* atomic_xchg保证scheduled=0这条指令在aio_notify之前被执行
* 假设这里没有memory barrier,那么指令可能被重排,aio_notify可能会先于scheduled=1执行
* 当aio_notify执行时scheduled还时0,事件循环调度cb前查看scheduled==0就不会调度这个cb了
* 这样会使cb得不到调度从而错过一次执行的机会
*/
if (atomic_xchg(&bh->scheduled, 1) == 0) {
aio_notify(ctx);
}
}
/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
* The count in ctx->list_lock is incremented before the call, and is
* not affected by the call.
*/
int aio_bh_poll(AioContext *ctx)
{
QEMUBH *bh, **bhp, *next;
int ret;
bool deleted = false;
ret = 0;
for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
next = atomic_rcu_read(&bh->next);
/* The atomic_xchg is paired with the one in qemu_bh_schedule. The
* implicit memory barrier ensures that the callback(执行事件循环的线程) sees all writes
* done by the scheduling thread(调用者线程). It also ensures that the scheduling
* thread sees the zero before bh->cb has run, and thus will call
* aio_notify again if necessary.
*/
/* qemu_bh_schedule和aio_bh_poll中的atomic_xchg是一对
* atomic_xchg保证了编译器不优化代码,使得aio_bh_call的执行必然在scheduled设置成0之后
* 假设这里不使用memory barrier那么编译可能对这段代码进行指令重排,aio_bh_call在scheduled=0之前执行
* qemu_bh_schedule中看到的scheduled仍然是1,因此不会发起aio_notify通知
* qemu_bh_schedule会误以为自己注册的bh将要被执行,但其实它不会被执行了。
* 因为属于它的这次调度在它读取scheduled之前已经被执行了。bh会因此错过一次正确的调度机会。
*/
if (atomic_xchg(&bh->scheduled, 0)) { // 如果scheduled被设置成1,表示注册者期望这个bh被调度
/* Idle BHs don't count as progress */
if (!bh->idle) {
ret = 1;
}
bh->idle = 0;
aio_bh_call(bh); // 执行cb
}
if (bh->deleted) { // 如果有bh期望在执行后被删除,将deleteed置1
deleted = true;
}
}
/* remove deleted bhs */
if (!deleted) { // 如果deleted为0,表示所有之前调度的bh中每有期望被删除的,直接返回
return ret;
}
/* 被调度的bh中有期望自己被删除的,进行删除操作 */
if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
bhp = &ctx->first_bh;
while (*bhp) {
bh = *bhp;
if (bh->deleted && !bh->scheduled) { // 如果有期望被删除的bh
*bhp = bh->next; // 从链表中删除,并释放其内存
g_free(bh);
} else {
bhp = &bh->next; // 取链表中的下一个
}
}
qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
}
return ret;
}
/* This func is async.The bottom half will do the delete action at the finial
* end.
*/
void qemu_bh_delete(QEMUBH *bh)
{
bh->scheduled = 0;
bh->deleted = 1;
}
禁用BH
当期望bh不被事件循环调度,但又不想删除这个bh时可以使用qemu_bh_cancel接口
/* This func is async.
*/
void qemu_bh_cancel(QEMUBH *bh)
{
atomic_mb_set(&bh->scheduled, 0);
}