1. 前言
本专题我们开始学习SCSI子系统的相关内容。本专题主要参考了《存储技术原理分析》、ULA、ULK的相关内容。本专题主要以硬件UFS为例,记录SCSI子系统的框架流程。
前面主要讲述了UFS的初始化过程,通过ufshcd_pltfrm_init负责获取IO资源,中断号,通过scsi_host_alloc为Scsi_host及私有数据空间hba分配空间,同时通过解析DTS中的属性值初始化hba相关成员,最后通过ufshcd_init对ufs controller进行初始化,并ufshcd_async_scan对链接的ufs device进行扫描。主要通过探测ufs device是否链接,如果探测到ufs device则检测是否ufs device已经ready,获取host和device都能支持的传输速率,最后扫描well known lun和normal lun。
在如上的初始化完毕之后,就可以对ufs设备进行io操作了,上层的的IO操作将最终转换为对底层UFS的命令处理,本节将介绍UFS命令处理的相关流程。
kernel版本:5.10
平台:arm64
注:
为方便阅读,正文标题采用分级结构标识,每一级用一个"-“表示,如:两级为”|- -", 三级为”|- - -“
2. scsi_queue_rq
参考 block多队列分析 - 3. 读文件过程 的分析,我们知道通过文件系统读取块设备上的文件时,经历了如下的流程(以ext4, nullb为例):
vfs_read -> new_sync_read -> ext4_file_read_iter -> generic_file_read_iter -> generic_file_buffered_read -> page_cache_async_readahead -> page_cache_async_ra -> ondemand_readahead -> do_page_cache_ra -> page_cache_ra_unbounded -> read_pages -> blk_finish_plug -> blk_flush_plug_list -> blk_mq_flush_plug_list -> blk_mq_sched_insert_requests -> blk_mq_run_hw_queue -> __blk_mq_delay_run_hw_queue -> blk_mq_run_work_fn -> __blk_mq_run_hw_queue -> blk_mq_dispatch_rq_list -> q->mq_ops->queue_rq
对于此处的q->mq_ops->queue_rq,通过ufs_qcom_probe -> ufshcd_init -> scsi_add_host -> scsi_mq_setup_tags可知:
tag_set->ops = &scsi_mq_ops;
而在ufs_qcom_probe -> ufshcd_init -> blk_mq_init_queue -> blk_mq_init_allocated_queue中:
q->mq_ops = set->ops;
由此可知,q->mq_ops就是scsi_mq_ops,q->mq_ops->queue_rq就是scsi_queue_rq
static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,const struct blk_mq_queue_data *bd)
|--struct request *req = bd->rq;
| struct request_queue *q = req->q;
| struct scsi_device *sdev = q->queuedata;
| struct Scsi_Host *shost = sdev->host;
| struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req)
|--scsi_set_resid(cmd, 0);
|--memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
|--cmd->scsi_done = scsi_mq_done;
|--blk_mq_start_request(req);
| |--blk_add_timer(rq);
| //Dispatch a command to the low-level driver
|--reason = scsi_dispatch_cmd(cmd)
|--struct Scsi_Host *host = cmd->device->host;
|--if (cmd->device->lun_in_cdb)
| cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) | (cmd->device->lun << 5 & 0xe0);
|--host->hostt->queuecommand(host, cmd);
当scsi_dispatch_cmd进行命令分发的时候,最终会调用到host->hostt->queuecommand(host, cmd)回调函数。这个回调函数的参数host为scsi_host,cmd为scsi_cmd,他将传递给更下层的ufs层去执行,UFS层对应的queuecommand回调函数为ufshcd_queuecommand,它定义了tatic struct scsi_host_template ufshcd_driver_template模板中,通过ufshcd_alloc_host->scsi_host_alloc赋值给shost->hostt
|- -ufshcd_queuecommand
static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
|--struct ufshcd_lrb *lrbp;
| hba = shost_priv(host);
| tag = cmd->request->tag;
|--err = ufshcd_hold(hba, true);
|--lrbp = &hba->lrb[tag];
| lrbp->cmd = cmd;
| lrbp->sense_bufflen = UFS_SENSE_SIZE;
| lrbp->sense_buffer = cmd->sense_buffer;
| lrbp->task_tag = tag;
| lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun);
| lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba) ? true : false;
|--ufshcd_prepare_lrbp_crypto(cmd->request, lrbp);
|--ufshcd_comp_scsi_upiu(hba, lrbp)
|--ufshcd_map_sg(hba, lrbp)
|--ufshcd_send_command(hba, tag);
|--scsi_dma_unmap(lrbp->cmd);
|--if (!err)
cmd->scsi_done(cmd);
-
ufshcd_comp_scsi_upiu(hba, lrbp)
主要根据scsi的命令来组合出ufs的upiu,在之前会用scsi command初始化struct ufshcd_lrb的相关信息,包括cmd, sense_buffer的长度,来自scsi cmd的sense_buffer的地址,task tag, lun number,等,因此也就可以通过ufshcd_lrb来组合出ufs upiu了。
(1)通过调用ufshcd_prepare_req_desc_hdr来准备upiu header;
(2)通过调用ufshcd_prepare_utp_scsi_cmd_upiu来组装upiu正文 -
ufshcd_map_sg(hba, lrbp)
(1) prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr
(2) 将scsi cmd传递的segment地址、大小等初始化给prd_table -
ufshcd_send_command(hba, tag)
主要通过ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL),告知host task_tag对应的upiu已经ready,可以发送给device
3. ufshcd_intr
UFS在初始化的时候通过调用devm_request_irq来申请中断,中断处理函数为ufshcd_intr,有几种情况下可以触发中断:UIC层命令完成,UFS command upiu或ufs query upiu完成,以及ufs task managment upiu完成均可触发中断,如上的UFS command upiu执行完成也会触发中断。
static irqreturn_t ufshcd_intr(int irq, void *__hba)
|--u32 intr_status, enabled_intr_status = 0;
| irqreturn_t retval = IRQ_NONE;
| struct ufs_hba *hba = __hba;
| int retries = hba->nutrs;
|--intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
|--while (intr_status && retries--)
enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE);
if (intr_status)
ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS);
if (enabled_intr_status)
retval |= ufshcd_sl_intr(hba, enabled_intr_status);
intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
实际的中断例程处理位于ufshcd_sl_intr
//Interrupt service routine
static irqreturn_t ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
|--irqreturn_t retval = IRQ_NONE;
|--if (ufshcd_is_auto_hibern8_error(hba, intr_status))
| hba->errors |= (UFSHCD_UIC_HIBERN8_MASK & intr_status);
|--if (hba->errors)
| retval |= ufshcd_check_errors(hba);
|--if (intr_status & UFSHCD_UIC_MASK)
| retval |= ufshcd_uic_cmd_compl(hba, intr_status);
|--if (intr_status & UTP_TASK_REQ_COMPL)
| retval |= ufshcd_tmc_handler(hba);
|--if (intr_status & UTP_TRANSFER_REQ_COMPL)
retval |= ufshcd_transfer_req_compl(hba);
此处以transfer request为例说明中断的处理过程
//handle SCSI and query command completion
static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
|--tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
| //outstanding_reqs表示正在处理的bit标志
|--completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
|--if (completed_reqs)
| __ufshcd_transfer_req_compl(hba, completed_reqs);
| return IRQ_HANDLED;
| else
| return IRQ_NONE;
//handle SCSI and query command completion
static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, unsigned long completed_reqs)
|--struct ufshcd_lrb *lrbp;
| struct scsi_cmnd *cmd;
|--for_each_set_bit(index, &completed_reqs, hba->nutrs)
| lrbp = &hba->lrb[index];
| lrbp->compl_time_stamp = ktime_get();
| cmd = lrbp->cmd;
| if (cmd)
| result = ufshcd_transfer_rsp_status(hba, lrbp);
| cmd->result = result;
| cmd->scsi_done(cmd);
| else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
| lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE)
| if (hba->dev_cmd.complete)
| complete(hba->dev_cmd.complete);
|
| if (ufshcd_is_clkscaling_supported(hba))
| hba->clk_scaling.active_reqs--;
|--hba->outstanding_reqs ^= completed_reqs
|--ufshcd_clk_scaling_update_busy(hba)
-
当检测到状态为UTP_TRANSFER_REQ_COMPL时会执行ufshcd_transfer_req_compl
-
cmd->scsi_done
通过调用.cmd->scsi_done来raise软中断。通过前面的scsi_queue_rq分析可知,此处的cmd->scsi_done回调就是scsi_mq_done
static void scsi_mq_done(struct scsi_cmnd *cmd)
|--blk_mq_complete_request(cmd->request);
|--if (!blk_mq_complete_request_remote(rq))
rq->q->mq_ops->complete(rq);
|--scsi_softirq_done(rq)
rq->q->mq_ops->complete(rq);的初始化可以参考scsi_queue_rq,可知rq->q->mq_ops->complete就是scsi_softirq_done
static void scsi_softirq_done(struct request *rq)
|--struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
| INIT_LIST_HEAD(&cmd->eh_entry);
| atomic_inc(&cmd->device->iodone_cnt);
| disposition = scsi_decide_disposition(cmd);
|--switch (disposition) {
case SUCCESS:
scsi_finish_command(cmd);
break;
case NEEDS_RETRY:
scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
break;
case ADD_TO_MLQUEUE:
scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
break;
default:
scsi_eh_scmd_add(cmd);
break;
}
scsi_decide_disposition:对命令进行处置,主要根据命令执行的状态分别作出不同的处理,返回成功,失败或重新加入队列;此处将以返回成功为例。
//cleanup and pass command back to upper layer
void scsi_finish_command(struct scsi_cmnd *cmd)
|--struct scsi_device *sdev = cmd->device;
| struct scsi_target *starget = scsi_target(sdev);
| struct Scsi_Host *shost = sdev->host;
| struct scsi_driver *drv;
| unsigned int good_bytes;
|--if (SCSI_SENSE_VALID(cmd))
| cmd->result |= (DRIVER_SENSE << 24);
|--good_bytes = scsi_bufflen(cmd);
|--scsi_io_completion(cmd, good_bytes);
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
|--int result = cmd->result;
| struct request_queue *q = cmd->device->request_queue;
| struct request *req = cmd->request;
| blk_status_t blk_stat = BLK_STS_OK
|--if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK))
| if (likely(!scsi_end_request(req, blk_stat, good_bytes)))
| return;
|--if (likely(result == 0))
scsi_io_completion_reprep(cmd, q);
else
scsi_io_completion_action(cmd, result);
static bool scsi_end_request(struct request *req, blk_status_t error,unsigned int bytes)
|--struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
| struct scsi_device *sdev = cmd->device;
| struct request_queue *q = sdev->request_queue;
|--if (blk_update_request(req, error, bytes))
| return true;
|--scsi_mq_uninit_cmd(cmd);
|--percpu_ref_get(&q->q_usage_counter);
|--__blk_mq_end_request(req, error)
|--scsi_run_queue_async(sdev)
bool blk_update_request(struct request *req, blk_status_t error,unsigned int nr_bytes)
|--while (req->bio)
struct bio *bio = req->bio;
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
if (bio_bytes == bio->bi_iter.bi_size)
req->bio = bio->bi_next;
bio_clear_flag(bio, BIO_TRACE_COMPLETION)
req_bio_endio(req, bio, bio_bytes, error)
total_bytes += bio_bytes;
nr_bytes -= bio_bytes;
if (!nr_bytes)
break
static void req_bio_endio(struct request *rq, struct bio *bio,unsigned int nbytes, blk_status_t error)
|--if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
|--if (bio->bi_disk)
| rq_qos_done_bio(bio->bi_disk->queue, bio);
|--blk_throtl_bio_endio(bio)
|--bio_uninit(bio);
|--if (bio->bi_end_io)
bio->bi_end_io(bio)
bio->bi_end_io回调来唤醒当前阻塞的进程