Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)

最新推荐文章于 2023-08-20 08:41:05 发布

victorzhangl

最新推荐文章于 2023-08-20 08:41:05 发布

阅读量1k

点赞数

分类专栏：转fudan_abc之Linux那些事儿之我是Block层文章标签： linux timer command cmd delay struct

转fudan_abc之Linux那些事儿之我是Block层专栏收录该内容

14 篇文章 0 订阅

订阅专栏

下一个更为重要的函数是 scsi_dispatch_cmd, 来自 drivers/scsi/scsi.c:

459 /*

460 * Function: scsi_dispatch_command

461 *

462 * Purpose: Dispatch a command to the low-level driver.

463 *

464 * Arguments: cmd - command block we are dispatching.

465 *

466 * Notes:

467 */

468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)

469 {

470 struct Scsi_Host *host = cmd->device->host;

471 unsigned long flags = 0;

472 unsigned long timeout;

473 int rtn = 0;

474

475 /* check if the device is still usable */

476 if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {

477 /* in SDEV_DEL we error all commands. DID_NO_CONNECT

478 * returns an immediate error upwards, and signals

479 * that the device is no longer present */

480 cmd->result = DID_NO_CONNECT << 16;

481 atomic_inc(&cmd->device->iorequest_cnt);

482 __scsi_done(cmd);

483 /* return 0 (because the command has been processed) */

484 goto out;

485 }

486

487 /* Check to see if the scsi lld put this device into state SDEV_BLOCK. */

488 if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {

489 /*

490 * in SDEV_BLOCK, the command is just put back on the device

491 * queue. The suspend state has already blocked the queue so

492 * future requests should not occur until the device

493 * transitions out of the suspend state.

494 */

495 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);

496

497 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));

498

499 /*

500 * NOTE: rtn is still zero here because we don't need the

501 * queue to be plugged on return (it's already stopped)

502 */

503 goto out;

504 }

505

506 /*

507 * If SCSI-2 or lower, store the LUN value in cmnd.

508 */

509 if (cmd->device->scsi_level <= SCSI_2 &&

510 cmd->device->scsi_level != SCSI_UNKNOWN) {

511 cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |

512 (cmd->device->lun << 5 & 0xe0);

513 }

514

515 /*

516 * We will wait MIN_RESET_DELAY clock ticks after the last reset so

517 * we can avoid the drive not being ready.

518 */

519 timeout = host->last_reset + MIN_RESET_DELAY;

520

521 if (host->resetting && time_before(jiffies, timeout)) {

522 int ticks_remaining = timeout - jiffies;

523 /*

524 * NOTE: This may be executed from within an interrupt

525 * handler! This is bad, but for now, it'll do. The irq

526 * level of the interrupt handler has been masked out by the

527 * platform dependent interrupt handling code already, so the

528 * sti() here will not cause another call to the SCSI host's

529 * interrupt handler (assuming there is one irq-level per

530 * host).

531 */

532 while (--ticks_remaining >= 0)

533 mdelay(1 + 999 / HZ);

534 host->resetting = 0;

535 }

536

537 /*

538 * AK: unlikely race here: for some reason the timer could

539 * expire before the serial number is set up below.

540 */

541 scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);

542

543 scsi_log_send(cmd);

544

545 /*

546 * We will use a queued command if possible, otherwise we will

547 * emulate the queuing and calling of completion function ourselves.

548 */

549 atomic_inc(&cmd->device->iorequest_cnt);

550

551 /*

552 * Before we queue this command, check if the command

553 * length exceeds what the host adapter can handle.

554 */

555 if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {

556 SCSI_LOG_MLQUEUE(3,

557 printk("queuecommand : command too long./n"));

558 cmd->result = (DID_ABORT << 16);

559

560 scsi_done(cmd);

561 goto out;

562 }

563

564 spin_lock_irqsave(host->host_lock, flags);

565 scsi_cmd_get_serial(host, cmd);

566

567 if (unlikely(host->shost_state == SHOST_DEL)) {

568 cmd->result = (DID_NO_CONNECT << 16);

569 scsi_done(cmd);

570 } else {

571 rtn = host->hostt->queuecommand(cmd, scsi_done);

572 }

573 spin_unlock_irqrestore(host->host_lock, flags);

574 if (rtn) {

575 if (scsi_delete_timer(cmd)) {

576 atomic_inc(&cmd->device->iodone_cnt);

577 scsi_queue_insert(cmd,

578 (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?

579 rtn : SCSI_MLQUEUE_HOST_BUSY);

580 }

581 SCSI_LOG_MLQUEUE(3,

582 printk("queuecommand : request rejected/n"));

583 }

584

585 out:

586 SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));

587 return rtn;

588 }

一路走来的兄弟一定会一眼就看出这里我们最期待的一行代码就是 571 那个 queuecommand() 的调用 . 因为这之后我们就知道该发生什么了 . 比如对于 U 盘驱动来说 , 命令就从这里接过去开始执行 . 而对于实际的 scsi 控制器 , 其对应的驱动中的 queuecommand 也会被调用 , 剩下的事情我们就不用操心了 . 正常情况下 queuecommand 返回 0. 于是紧接着 scsi_dispatch_cmd 也返回 0. 这样就算是执行了一条 scsi 命令了 .

而 scsi_request_fn() 是否结束还得看 while 循环的条件是否满足 , 而这就得看 blk_queue_plugged() 的脸色了 . 那么我们从字面上来分析 , 什么叫 queue plugged? 我那盗版金山词霸告诉我 plugged 就是塞紧的意思 , 你说队列塞紧的是什么意思 ? 比如说 , 北四环上上下班高峰期 , 许许多多的车辆排成一队又一队 , 但是可能半天都前进不了 , 这就叫塞紧 , 或者说堵车 , 也叫塞车 . 为此咱们使用一个 flag 来标志堵车与否 , 来自 include/linux/blkdev.h:

523 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)

改变这个这个 flag 的函数有两个 , 一个是设置 , 一个是取消 .

负责设置的是 blk_plug_device.

1542 /*

1543 * "plug" the device if there are no outstanding requests: this will

1544 * force the transfer to start only after we have put all the requests

1545 * on the list.

1546 *

1547 * This is called with interrupts off and no requests on the queue and

1548 * with the queue lock held.

1549 */

1550 void blk_plug_device(request_queue_t *q)

1551 {

1552 WARN_ON(!irqs_disabled());

1553

1554 /*

1555 * don't plug a stopped queue, it must be paired with blk_start_queue()

1556 * which will restart the queueing

1557 */

1558 if (blk_queue_stopped(q))

1559 return;

1560

1561 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {

1562 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

1563 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);

1564 }

1565 }

负责取消的是 blk_remove_plug().

1569 /*

1570 * remove the queue from the plugged list, if present. called with

1571 * queue lock held and interrupts disabled.

1572 */

1573 int blk_remove_plug(request_queue_t *q)

1574 {

1575 WARN_ON(!irqs_disabled());

1576

1577 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1578 return 0;

1579

1580 del_timer(&q->unplug_timer);

1581 return 1;

1582 }

而调用前者的地方不少 , 比如我们见到的 __elv_add_request, 其第四个参数 int plug 就可以控制是否调用 blk_plug_device(), 而当我们在 blk_execute_rq_nowait() 中调用 __elv_add_request() 的时候传递的 plug 就是 1.

另一方面 , 调用 blk_remove_plug 的地方也有多处 . 其中 __generic_unplug_device() 就是之一 . 所以在咱们这个上下文里 , 实际上并没有设置这个 flag, 因此 scsi_request_fn() 就会被执行 .

那么编写这两个函数究竟是为了什么呢 ? 这年头 , 有人做贼 , 我可以理解是为了劫富济贫 , 有人杀人 , 我可以理解是为了伸张正义 , 甚至有女人红杏出墙 , 我还可以理解是为了繁荣经济 . 然而 , 很长一段时间我都没办法理解有人编写这两个函数是为了什么 ?

后来我想 , 不妨这样理解 , 假设你经常开车经过长安街 , 你会发现经常有戒严的现象发生 , 比如某位领导人要出行 , 比如某位领导人要来访 , 而你可以把 blk_plug_device() 想象成戒严 , 把 blk_remove_plug 想象成开放 . 车流要想行进 , 前提条件是没有戒严 , 换言之 , 没有设卡 , 而 QUEUE_FLAG_PLUGGED 这个 flag 就相当于 ” 卡 ”, 设了它队列就不能前进了 , 没有设才有可能前进 . 之所以需要设卡 , 是因为确实有这个需求 , 有时候确实不想让队列前进 .

那么这里我们还看到两个函数被调用了 ,mod_timer 和 del_timer, 这是干嘛使的 ? 还记得 kblockd 么 ? 最早咱们创建了那个工作队列 kblockd_workqueue, 现在是它该出场的时间了 . 让我们把镜头拉回到函数 blk_init_queue_node(). 这个函数我们曾经看过 , 所以这里只贴出其中跟我们这里密切相关的几行 :

1922 q->request_fn = rfn;

1923 q->prep_rq_fn = NULL;

1924 q->unplug_fn = generic_unplug_device;

1925 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);

1926 q->queue_lock = lock;

1927

1928 blk_queue_segment_boundary(q, 0xffffffff);

1929

1930 blk_queue_make_request(q, __make_request);

首先 q->unplug_fn 被赋上了 generic_unplug_device. 这一点很重要 , 稍后会用到 .

然后来看 blk_queue_make_request(). 这个函数当时咱们并没有讲过 . 来自 block/ll_rw_block.c:

180 /**

181 * blk_queue_make_request - define an alternate make_request function for a device

182 * @q: the request queue for the device to be affected

183 * @mfn: the alternate make_request function

184 *

185 * Description:

186 * The normal way for &struct bios to be passed to a device

187 * driver is for them to be collected into requests on a request

188 * queue, and then to allow the device driver to select requests

189 * off that queue when it is ready. This works well for many block

190 * devices. However some block devices (typically virtual devices

191 * such as md or lvm) do not benefit from the processing on the

192 * request queue, and are served best by having the requests passed

193 * directly to them. This can be achieved by providing a function

194 * to blk_queue_make_request().

195 *

196 * Caveat:

197 * The driver that does this *must* be able to deal appropriately

198 * with buffers in "highmemory". This can be accomplished by either calling

199 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling

200 * blk_queue_bounce() to create a buffer in normal memory.

201 **/

202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)

203 {

204 /*

205 * set defaults

206 */

207 q->nr_requests = BLKDEV_MAX_RQ;

208 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

209 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

210 q->make_request_fn = mfn;

211 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

212 q->backing_dev_info.state = 0;

213 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

214 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);

215 blk_queue_hardsect_size(q, 512);

216 blk_queue_dma_alignment(q, 511);

217 blk_queue_congestion_threshold(q);

218 q->nr_batching = BLK_BATCH_REQ;

219

220 q->unplug_thresh = 4; /* hmm */

221 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */

222 if (q->unplug_delay == 0)

223 q->unplug_delay = 1;

224

225 INIT_WORK(&q->unplug_work, blk_unplug_work);

226

227 q->unplug_timer.function = blk_unplug_timeout;

228 q->unplug_timer.data = (unsigned long)q;

229

230 /*

231 * by default assume old behaviour and bounce for any highmem page

232 */

233 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

234 }

这里重点关注几个 ”unplug” 为名字的成员 . 尤其是 INIT_WORK, 它使得一旦 unplug_work 这项工作被执行 ,blk_unplug_work 这个函数就会被执行 . 而 unplug_timer 这么一赋值 , 我们就知道 , 一旦设了闹钟 , 一旦闹钟时间到了 ,blk_unplug_timeout 这个函数就会被执行 . 并且因为这里设置了 unplug_delay 为 3ms, 使得闹钟的 timeout 就是 3ms, 一旦激活闹钟 ,3ms 之后 blk_unplug_timeout 就会被执行 . 这个函数来自 block/ll_rw_blk.c:

1646 static void blk_unplug_timeout(unsigned long data)

1647 {

1648 request_queue_t *q = (request_queue_t *)data;

1649

1650 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,

1651 q->rq.count[READ] + q->rq.count[WRITE]);

1652

1653 kblockd_schedule_work(&q->unplug_work);

1654 }

可以看到 , 其实就是执行 kblockd_schedule_work, 换言之 , 真正被调用的函数就是 blk_unplug_work().

1636 static void blk_unplug_work(struct work_struct *work)

1637 {

1638 request_queue_t *q = container_of(work, request_queue_t, unplug_work);

1639

1640 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

1641 q->rq.count[READ] + q->rq.count[WRITE]);

1642

1643 q->unplug_fn(q);

1644 }

而刚才我们说了 ,unplug_fn 被赋上了 generic_unplug_device. 所以真正要执行的是 generic_unplug_device. 而这个函数又长成什么样呢 ?

1601 /**

1602 * generic_unplug_device - fire a request queue

1603 * @q: The &request_queue_t in question

1604 *

1605 * Description:

1606 * Linux uses plugging to build bigger requests queues before letting

1607 * the device have at them. If a queue is plugged, the I/O scheduler

1608 * is still adding and merging requests on the queue. Once the queue

1609 * gets unplugged, the request_fn defined for the queue is invoked and

1610 * transfers started.

1611 **/

1612 void generic_unplug_device(request_queue_t *q)

1613 {

1614 spin_lock_irq(q->queue_lock);

1615 __generic_unplug_device(q);

1616 spin_unlock_irq(q->queue_lock);

1617 }

哦 , 扭扭捏捏大半天 , 其实就是调用 __generic_unplug_device. 而回过头去看这个函数 , 我们知道 , 它也无非就是调用了两个函数 ,blk_remove_plug 和 request_fn. 这下子我们基本上就明白了 . 总结一下就是 :

1. blk_plug_device() 负责戒严 .

2. blk_remove_plug() 负责解禁 .

3. 但是戒严这东西吧 , 也是有时间限制的 , 毕竟长安街就算有重大活动也是短时间的 , 一年中毕竟大多数时间还是得保证道路畅通 . 所以在戒严的时候 , 设了一个定时器 ,unplug_timer, ( 即 mod_timer ), 一旦时间到了就自动执行 blk_remove_plug 去解禁 .

4. 而在解禁的时候就不要忘记把这个定时器给关掉 .( 即 del_timer)

5. 解禁之后调用 request_fn() 开始处理队列中的下一个请求 , 或者说车流开始恢复前行 .

Ok, 这样我们就算是明白这两个戒严与解禁的函数了 . 最后 , 题外话 , 关于 unplug 和 plug, 我觉得更贴切的单词是 activate 和 deactivate, 或者说激活与冻结 , 或者简单的说 , 开与关 .

victorzhangl

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)

 下一个更为重要的函数是scsi_dispatch_cmd,来自drivers/scsi/scsi.c: 459 /* 460 * Function: scsi_dispatch_command 461 * 462 * Purpose: Dispatch a command to the low-level driver. 463 *<br /
复制链接

扫一扫

专栏目录