操作系统学习-3.Linux文件系统学习3-io的plug过程

最新推荐文章于 2023-04-01 19:59:15 发布

沈万三djh

最新推荐文章于 2023-04-01 19:59:15 发布

阅读量447

点赞数

分类专栏：操作系统文章标签： linux list 运维

本文链接：https://blog.csdn.net/djhsws/article/details/120883638

版权

操作系统专栏收录该内容

9 篇文章 2 订阅

订阅专栏

io的plug过程

io的plug过程：启动篇
io的plug过程：request请求
io的plug过程：blk_init_queue
- 1、这个request_count 个数的统计
- 2、blk_flush_plug_list进行泄洪我们还要继续看
io的plug过程：blk_flush_plug_list的情况
io的plug过程：queuelist的问题

io的plug过程：启动篇

include/linux/blkdev.h
struct blk_plug {
  struct list_head list; /* requests */
  struct list_head mq_list; /* blk-mq requests */
  struct list_head cb_list; /* md requires an unplug callback */
};

这个是plug过程的主要结构体

block/blk-core.c
void blk_start_plug(struct blk_plug *plug)
{
  struct task_struct *tsk = current;
  if (tsk->plug)
    return;
  INIT_LIST_HEAD(&plug->list);
  INIT_LIST_HEAD(&plug->mq_list);
  INIT_LIST_HEAD(&plug->cb_list);

  tsk->plug = plug;
}

这个是主要的函数

接下来我们将分析plug的过程

io的plug过程：request请求

其实还是从上次的请求开始继续分析的，

从应用的角度来说，请求一个bio 最终会调到这里。make_request_fn

而从驱动的角度来说，我们说有两种：

blk_init_queue

blk_queue_make_request

这个就形成一种闭环。

这两个接口都是设置应用上次的请求的接口函数的，

我从blk_init_queue 去分析，发现最后还是blk_queue_make_request，只是中间加了一层自己的blk_queue_bio。

io的plug过程：blk_init_queue

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
{
  return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
}

struct request_queue *
blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{
  struct request_queue *uninit_q, *q;

  uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
  if (!uninit_q)
    return NULL;

  q = blk_init_allocated_queue(uninit_q, rfn, lock);
  if (!q)
    blk_cleanup_queue(uninit_q);

  return q;
}

struct request_queue *
blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
       spinlock_t *lock)
{
  if (!q)
    return NULL;

  q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
  if (!q->fq)
    return NULL;

  if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
    goto fail;

  q->request_fn    = rfn;
  q->prep_rq_fn    = NULL;
  q->unprep_rq_fn    = NULL;
  q->queue_flags    |= QUEUE_FLAG_DEFAULT;

  if (lock)
    q->queue_lock    = lock;

  blk_queue_make_request(q, blk_queue_bio);

  q->sg_reserved_size = INT_MAX;

  mutex_lock(&q->sysfs_lock);

  if (elevator_init(q, NULL)) {
    mutex_unlock(&q->sysfs_lock);
    goto fail;
  }
  mutex_unlock(&q->sysfs_lock);
  return q;

fail:
  blk_free_flush_queue(q->fq);
  return NULL;
}

这里就是可以看出两种模式的区分了，很明显了，至于blk_queue_make_request里面当然是一个队里loop类个实现，这里就不求甚解了。

这里如果是自己blk_queue_make_request就是没有任何调度和蓄洪可言了。就直接的到驱动了。

如果用blk_queue_bio转一下就会有蓄洪，和电梯。下面我看下blk_queue_bio

static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
{
  const bool sync = !!(bio->bi_rw & REQ_SYNC);
  struct blk_plug *plug;
  int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
  struct request *req;
  unsigned int request_count = 0;

  blk_queue_bounce(q, &bio);

  blk_queue_split(q, &bio, q->bio_split);

  if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
    bio->bi_error = -EIO;
    bio_endio(bio);
    return BLK_QC_T_NONE;
  }

  if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
    spin_lock_irq(q->queue_lock);
    where = ELEVATOR_INSERT_FLUSH;
    goto get_rq;
  }
//nomerge 是如果没有合并的要求
  if (!blk_queue_nomerges(q)) {
    if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
      return BLK_QC_T_NONE;
  } else
    request_count = blk_plug_queued_count(q);//统计这个队列里面的request个数

  spin_lock_irq(q->queue_lock);

  el_ret = elv_merge(q, &req, bio);//这函数是梳理电梯的，请求，该合并的合并该链接在一块的连接在一块
  if (el_ret == ELEVATOR_BACK_MERGE) {
    if (bio_attempt_back_merge(q, req, bio)) {
      elv_bio_merged(q, req, bio);
      if (!attempt_back_merge(q, req))
        elv_merged_request(q, req, el_ret);
      goto out_unlock;
    }
  } else if (el_ret == ELEVATOR_FRONT_MERGE) {
    if (bio_attempt_front_merge(q, req, bio)) {
      elv_bio_merged(q, req, bio);
      if (!attempt_front_merge(q, req))
        elv_merged_request(q, req, el_ret);
      goto out_unlock;
    }
  }

get_rq:
  rw_flags = bio_data_dir(bio);
  if (sync)
    rw_flags |= REQ_SYNC;

  req = get_request(q, rw_flags, bio, GFP_NOIO);
  if (IS_ERR(req)) {
    bio->bi_error = PTR_ERR(req);
    bio_endio(bio);
    goto out_unlock;
  }

  init_request_from_bio(req, bio);

  if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
    req->cpu = raw_smp_processor_id();

  plug = current->plug;
  if (plug) {
    if (!request_count)
      trace_block_plug(q);
    else {
      if (request_count >= BLK_MAX_REQUEST_COUNT) {//如果这个队列的个数太多了，就通过下面的blk_flush_plug_list进行泄洪
        blk_flush_plug_list(plug, false);
        trace_block_plug(q);
      }
    }
    list_add_tail(&req->queuelist, &plug->list);//如果这个队列还不是很多，就继续加入到尾部。
    blk_account_io_start(req, true);
  } else {
    spin_lock_irq(q->queue_lock);
    add_acct_request(q, req, where);
    __blk_run_queue(q);
out_unlock:
    spin_unlock_irq(q->queue_lock);
  }

  return BLK_QC_T_NONE;
}

这里其实还不是很清楚蓄洪和泄洪，只是看到了一些框架。

1、这个request_count 个数的统计

unsigned int blk_plug_queued_count(struct request_queue *q)
{
...
  plug = current->plug;
...

  if (q->mq_ops)
    plug_list = &plug->mq_list;
  else
    plug_list = &plug->list;

  list_for_each_entry(rq, plug_list, queuelist) {
    if (rq->q == q)
      ret++;
  }
out:
  return ret;
}

就是在统计这个list的个数，那么这个list是没有实际意义的，只是统计个数，为什么不用int去做。

2、blk_flush_plug_list进行泄洪我们还要继续看

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
  struct request_queue *q;
  unsigned long flags;
  struct request *rq;
  LIST_HEAD(list);
  unsigned int depth;

  flush_plug_callbacks(plug, from_schedule);

  if (!list_empty(&plug->mq_list))
    blk_mq_flush_plug_list(plug, from_schedule);

  if (list_empty(&plug->list))
    return;

  list_splice_init(&plug->list, &list);

  list_sort(NULL, &list, plug_rq_cmp);

  q = NULL;
  depth = 0;
  local_irq_save(flags);
  while (!list_empty(&list)) {
    rq = list_entry_rq(list.next);
    list_del_init(&rq->queuelist);
    BUG_ON(!rq->q);
    if (rq->q != q) {
      if (q)
        queue_unplugged(q, depth, from_schedule);
      q = rq->q;
      depth = 0;
      spin_lock(q->queue_lock);
    }
    if (unlikely(blk_queue_dying(q))) {
      __blk_end_request_all(rq, -ENODEV);
      continue;
    }
    if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
      __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
    else
      __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

    depth++;
  }
  if (q)
    queue_unplugged(q, depth, from_schedule);
        local_irq_restore(flags);
}

就是将list的各个元素拿出来加入到电梯队列里面去，电梯怎么取呢？
在这里插入图片描述

这个是电梯的接口了，我们下次再关注这些电梯的算法接口。

现在问题是这些**__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH)**; rq是从list里面取出来的，是哪里加进去的呢？

我们在上面好像没有看到。

io的plug过程：blk_flush_plug_list的情况

在上面中我们看到

if (plug) {
    if (!request_count)
      trace_block_plug(q);
    else {
      if (request_count >= BLK_MAX_REQUEST_COUNT) {//如果这个队列的个数太多了，就通过下面的blk_flush_plug_list进行泄洪
        blk_flush_plug_list(plug, false);
        trace_block_plug(q);
      }
    }

这个是在blk_queue_bio 中，也就是在提交请求的情况下，会调用blk_flush_plug_list 来unplug操作。
今天看看另外一种unplug操作，
调度时进行unplug(异步方式)
当发生内核调度时，当前进程sleep前，先将当前task的plug列表中的请求flush到派发队列中，并进行unplug。
主要代码流程如下：

schedule->

    sched_submit_work ->

        blk_schedule_flush_plug()->

            blk_flush_plug_list(plug, true) ->注意:
     这里传入的from_schedule参数为true，表示将触发异步unplug，
     即唤醒kblockd工作队列来进行unplug操作。
     后续的kblockd的唤醒周期在块设备驱动中设置，比如scsi中设置为3ms。

queue_unplugged-> blk_run_queue_async

queue_unplugged():

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
  struct request_queue *q;
  unsigned long flags;
  struct request *rq;
  LIST_HEAD(list);
  unsigned int depth;

  flush_plug_callbacks(plug, from_schedule);

  if (!list_empty(&plug->mq_list))
    blk_mq_flush_plug_list(plug, from_schedule);

  if (list_empty(&plug->list))
    return;

  list_splice_init(&plug->list, &list);

  list_sort(NULL, &list, plug_rq_cmp);

  q = NULL;
  depth = 0;
  local_irq_save(flags);
  while (!list_empty(&list)) {
    rq = list_entry_rq(list.next);list 里面存放的就是要执行的rq，每一个遍历出来。这个
    list_del_init(&rq->queuelist);
    BUG_ON(!rq->q);
    if (rq->q != q) {
      /*
       * This drops the queue lock
       */
      if (q)
        queue_unplugged(q, depth, from_schedule);
      q = rq->q;
      depth = 0;
      spin_lock(q->queue_lock);
    }

    /*
     * Short-circuit if @q is dead
     */
    if (unlikely(blk_queue_dying(q))) {
      __blk_end_request_all(rq, -ENODEV);
      continue;
    }

    /*
     * rq is already accounted, so use raw insert
     */
    if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
      __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
    else
      __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

    depth++;
  }

static void queue_unplugged(struct request_queue *q, unsigned int depth,
          bool from_schedule)
  __releases(q->queue_lock)
{
  if (from_schedule)
    blk_run_queue_async(q);
  else
    __blk_run_queue(q);
  spin_unlock(q->queue_lock);
}

根据from_schedule 进行同步操作或者异步

void blk_run_queue_async(struct request_queue *q)
{
  if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
    mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}

这里面就调入一个异步时钟，到见了它还是会调用到__blk_run_queue，进行request的操作的。

这里面异步时钟的逻辑，它里面有一套，大概可以比喻成handle 的消息处理机制，这一套我们先不讨论。

我们先看下这个异步时钟是在哪里设置的，

blk_init_queue–》blk_init_queue_node–》blk_alloc_queue_node

就是在初始化的时候做的，

blk_init_queue–》blk_init_queue_node–》blk_alloc_queue_node

就是在初始化的时候做的，

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
  struct request_queue *q;
  int err;
.....
  setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
        laptop_mode_timer_fn, (unsigned long) q);
  setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);


  INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
}

中间的时钟消息逻辑我们先不看，我们知道，到时间以后，它会执行blk_delay_work

static void blk_delay_work(struct work_struct *work)
{
  struct request_queue *q;

  q = container_of(work, struct request_queue, delay_work.work);
  spin_lock_irq(q->queue_lock);
  __blk_run_queue(q);
  spin_unlock_irq(q->queue_lock);
}

OK，最终还是到 __blk_run_queue，在进行下一步分析前我们先处理下逻辑。

就是一个list里面，有很多个bio请求，这些bio请求转换成驱动能认识的就是rq，什么时候对这个rq进行遍历，

一个就是实际请求的时候，同步策略，另外一个就是schedule的时候异步。

达到的条件是要达到MAX，我就开始遍历。每个rq。

rq的—>q,是由驱动设置进来的，q有可能有很多个驱动，如磁盘，usb或者其他block驱动

所以这里可以细化的，就是可以做一个list分类，每个驱动达到MAX，再来清理

那么我们作为学习，假设就是一个q来说，最终由 __blk_run_queue，送入到那个地方进行运行，而驱动就使用elv_next_request，

拿取，rq。这里似乎还有点差距。我们在来看看__blk_run_queue，应该就没什么缝隙了。

void __blk_run_queue(struct request_queue *q)
{
  if (unlikely(blk_queue_stopped(q)))
    return;

  __blk_run_queue_uncond(q);
}

inline void __blk_run_queue_uncond(struct request_queue *q)
{
  if (unlikely(blk_queue_dead(q)))
    return;
  q->request_fn_active++;
  q->request_fn(q);
  q->request_fn_active--;
}

OK，最终调用的是request_fn，就是在
在这里插入图片描述
这边设置进来的东西，上面是我们的demo代码。

ok，调用到了驱动的代码，没有问题了，最终的这个elv_next_request，进行那q里面，电梯排好的请求。

这就意味着，电梯算法处理的是rq，它是怎么进入的
在这里插入图片描述
进入也是没有问题的，

初始化也应该没有问题，这样的话，我大概就是可以去看，elv的代码了。

io的plug过程：queuelist的问题

在前面的梳理中一直对queuelist有一个来源的疑问，今天结合noop的电梯调度算法进行一次梳理。

也是看了noop后，才回来思考的。

blk_qc_t generic_make_request(struct bio *bio)
{
  struct bio_list bio_list_on_stack;
  blk_qc_t ret = BLK_QC_T_NONE;
  if (!generic_make_request_checks(bio))
    goto out;
  if (current->bio_list) {
    bio_list_add(current->bio_list, bio);
    goto out;
  }
  bio_list_init(&bio_list_on_stack);
  current->bio_list = &bio_list_on_stack;
  do {
    struct request_queue *q = bdev_get_queue(bio->bi_bdev);
    if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
      ret = q->make_request_fn(q, bio);
      blk_queue_exit(q);
      bio = bio_list_pop(current->bio_list);
    } else {
      struct bio *bio_next = bio_list_pop(current->bio_list);
      bio_io_error(bio);
      bio = bio_next;
    }
  } while (bio);
  current->bio_list = NULL; /* deactivate */

out:
  return ret;
}

这里还是有些疑问的，关于bio_list的，这样的写法还是第一次看到，搞不清楚它的逻辑。

我们就假设，这个do_while就是对这个bio_list进行清空遍历操作。

然后就进行make_request_fn，

我们说了驱动层的两种，在这里如果是直接的那种，相当于直接调用了，

如果是第二种则会进入电梯处理函数。

static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
{
  const bool sync = !!(bio->bi_rw & REQ_SYNC);
  struct blk_plug *plug;
  int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
  struct request *req;
  unsigned int request_count = 0;
  blk_queue_bounce(q, &bio);
  blk_queue_split(q, &bio, q->bio_split);
  if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
    bio->bi_error = -EIO;
    bio_endio(bio);
    return BLK_QC_T_NONE;
  }

  if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
    spin_lock_irq(q->queue_lock);
    where = ELEVATOR_INSERT_FLUSH;
    goto get_rq;
  }

  if (!blk_queue_nomerges(q)) {
    if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
      return BLK_QC_T_NONE;
  } else
    request_count = blk_plug_queued_count(q);

  spin_lock_irq(q->queue_lock);

  el_ret = elv_merge(q, &req, bio);
  if (el_ret == ELEVATOR_BACK_MERGE) {
    if (bio_attempt_back_merge(q, req, bio)) {
      elv_bio_merged(q, req, bio);
      if (!attempt_back_merge(q, req))
        elv_merged_request(q, req, el_ret);
      goto out_unlock;
    }
  } else if (el_ret == ELEVATOR_FRONT_MERGE) {
    if (bio_attempt_front_merge(q, req, bio)) {
      elv_bio_merged(q, req, bio);
      if (!attempt_front_merge(q, req))
        elv_merged_request(q, req, el_ret);
      goto out_unlock;
    }
  }

get_rq:
  rw_flags = bio_data_dir(bio);
  if (sync)
    rw_flags |= REQ_SYNC;

  req = get_request(q, rw_flags, bio, GFP_NOIO);
  if (IS_ERR(req)) {
    bio->bi_error = PTR_ERR(req);
    bio_endio(bio);
    goto out_unlock;
  }
  init_request_from_bio(req, bio);

  if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
    req->cpu = raw_smp_processor_id();

  plug = current->plug;
  if (plug) {
    if (!request_count)
      trace_block_plug(q);
    else {
      if (request_count >= BLK_MAX_REQUEST_COUNT) {
        blk_flush_plug_list(plug, false);
        trace_block_plug(q);
      }
    }
    list_add_tail(&req->queuelist, &plug->list);
    blk_account_io_start(req, true);
  } else {
    spin_lock_irq(q->queue_lock);
    add_acct_request(q, req, where);
    __blk_run_queue(q);
out_unlock:
    spin_unlock_irq(q->queue_lock);
  }
  return BLK_QC_T_NONE;
}

这里如果count > MAX了就进行 flush操作。但是这里如果没有达到要求来说，就是将queuelist 放到 plug->list
之前一直在纠结这个queuelist的元素是怎么哪里加入进去的，
其实是自己进入了一个误区。

queuelist其实就是连接各个rq的一个链表而已，内核list经常是这样子的，自己却忘记了。

在这里插入图片描述
只是这么一个结构。

好吧，我们现在假设已经达到了MAX，就要进行清除操作。

void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
  struct request_queue *q;
  unsigned long flags;
  struct request *rq;
  LIST_HEAD(list);
  unsigned int depth;

  flush_plug_callbacks(plug, from_schedule);

  if (!list_empty(&plug->mq_list))
    blk_mq_flush_plug_list(plug, from_schedule);

  if (list_empty(&plug->list))
    return;

  list_splice_init(&plug->list, &list);

  list_sort(NULL, &list, plug_rq_cmp);

  q = NULL;
  depth = 0;

  local_irq_save(flags);
  while (!list_empty(&list)) {
    rq = list_entry_rq(list.next);
    list_del_init(&rq->queuelist);
    BUG_ON(!rq->q);
    if (rq->q != q) {
      if (q)
        queue_unplugged(q, depth, from_schedule);
      q = rq->q;
      depth = 0;
      spin_lock(q->queue_lock);
    }


    if (unlikely(blk_queue_dying(q))) {
      __blk_end_request_all(rq, -ENODEV);
      continue;
    }

    if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
      __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
    else
      __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);

    depth++;
  }
}

plug->list 拿到while中进行循环，直到是empty。

然后拿出每一个rq，然后进行list_del_init(&rq->queuelist);

就是如图。
在这里插入图片描述
就是从链表中剥夺它的位置，当然有可能是头结点或者其他情况。

然后queue_unplugged 是告诉驱动你可以调用elv_next_request/blk_peek_request 到电梯里去拿数据了

而后面的**__elv_add_request** 讲数据加入电梯。

现在我们用noop的算法过程来连接上驱动和电梯，和蓄洪plug。

先继续从__elv_add_request往下看

void __elv_add_request(struct request_queue *q, struct request *rq, int where)
{
  trace_block_rq_insert(q, rq);

  blk_pm_add_request(q, rq);

  rq->q = q;

  if (rq->cmd_flags & REQ_SOFTBARRIER) {
    if (rq->cmd_type == REQ_TYPE_FS) {
      q->end_sector = rq_end_sector(rq);
      q->boundary_rq = rq;
    }
  } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
        (where == ELEVATOR_INSERT_SORT ||
         where == ELEVATOR_INSERT_SORT_MERGE))
    where = ELEVATOR_INSERT_BACK;

  switch (where) {
  case ELEVATOR_INSERT_REQUEUE:
  case ELEVATOR_INSERT_FRONT:
    rq->cmd_flags |= REQ_SOFTBARRIER;
    list_add(&rq->queuelist, &q->queue_head);
    break;

  case ELEVATOR_INSERT_BACK:
    rq->cmd_flags |= REQ_SOFTBARRIER;
    elv_drain_elevator(q);
    list_add_tail(&rq->queuelist, &q->queue_head);
    __blk_run_queue(q);
    break;

  case ELEVATOR_INSERT_SORT_MERGE:
    if (elv_attempt_insert_merge(q, rq))
      break;
  case ELEVATOR_INSERT_SORT:
    BUG_ON(rq->cmd_type != REQ_TYPE_FS);
    rq->cmd_flags |= REQ_SORTED;
    q->nr_sorted++;
    if (rq_mergeable(rq)) {
      elv_rqhash_add(q, rq);
      if (!q->last_merge)
        q->last_merge = rq;
    }
    q->elevator->type->ops.elevator_add_req_fn(q, rq);
    break;

  case ELEVATOR_INSERT_FLUSH:
    rq->cmd_flags |= REQ_SOFTBARRIER;
    blk_insert_flush(rq);
    break;
  default:
    printk(KERN_ERR "%s: bad insertion point %d\n",
           __func__, where);
    BUG();
  }
}

这边有好多个case。其实对于noop来说，意义都不大，noop来说它最终就是将数据放入到q->queue_head队列就好了。

这个队列是驱动初始化的申请的。

我们看下最终的ops

static struct elevator_type elevator_noop = {
  .ops = {
    .elevator_merge_req_fn    = noop_merged_requests,
    .elevator_dispatch_fn    = noop_dispatch,
    .elevator_add_req_fn    = noop_add_request,
    .elevator_former_req_fn    = noop_former_request,
    .elevator_latter_req_fn    = noop_latter_request,
    .elevator_init_fn    = noop_init_queue,
    .elevator_exit_fn    = noop_exit_queue,
  },
  .elevator_name = "noop",
  .elevator_owner = THIS_MODULE,
};

static void noop_add_request(struct request_queue *q, struct request *rq)
{
  struct noop_data *nd = q->elevator->elevator_data;

  list_add_tail(&rq->queuelist, &nd->queue);
}

还是一样的。最终就是放在nd->queue。这个nd和上面的q是不是一个东西呢？应该是吧，等我，看下电梯章节的elevator_init

我们现在来看下拿数据的过程elv_next_request/blk_peek_request

之前的版本一直都是用elv_next_request，后面的好像比较常规用blk_peek_request但是最终还是调用elv_next_request。

static inline struct request *__elv_next_request(struct request_queue *q)
{
  struct request *rq;
  struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);

  while (1) {
    if (!list_empty(&q->queue_head)) {
      rq = list_entry_rq(q->queue_head.next);
      return rq;
    }

    if (fq->flush_pending_idx != fq->flush_running_idx &&
        !queue_flush_queueable(q)) {
      fq->flush_queue_delayed = 1;
      return NULL;
    }
    if (unlikely(blk_queue_bypass(q)) ||
        !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
      return NULL;
  }
}

这while(1),有点奇怪啊，会不会造成驱动的死等待，有可能它在前面加了线程吧。

后面就是从q->queue中拿rq了。

static int noop_dispatch(struct request_queue *q, int force)
{
  struct noop_data *nd = q->elevator->elevator_data;
  struct request *rq;

  rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
  if (rq) {
    list_del_init(&rq->queuelist);
    elv_dispatch_sort(q, rq);
    return 1;
  }
  return 0;
}

而这dispatch，了解下，从nd中拿出rq，放到elv_dispatch_sort。

void elv_dispatch_sort(struct request_queue *q, struct request *rq)
{
  sector_t boundary;
  struct list_head *entry;
  int stop_flags;
  if (q->last_merge == rq)
    q->last_merge = NULL;
  elv_rqhash_del(q, rq);
  q->nr_sorted--;
  boundary = q->end_sector;
  stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
  list_for_each_prev(entry, &q->queue_head) {
    struct request *pos = list_entry_rq(entry);
..............
  }
  list_add(&rq->queuelist, entry);
}

这里就很清楚了，将上面nd拿出来的rq，放到q里面。

哦，那上面的疑问就清楚了，前面的nd和q不是同一个，意思说前面的case，有可能强制插入到头部的情况，就是抢占，感觉挺科学的。

沈万三djh

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
操作系统学习-3.Linux文件系统学习3-io的plug过程

io的plug过程io的plug过程：启动篇io的plug过程：request请求io的plug过程：blk_init_queue1、这个request_count 个数的统计2、blk_flush_plug_list进行泄洪我们还要继续看io的plug过程：blk_flush_plug_list的情况io的plug过程：queuelist的问题io的plug过程：启动篇include/linux/blkdev.hstruct blk_plug { struct list_head list; /
复制链接

扫一扫