1.使用请求队列
块设备驱动在使用请求队列的场景下,会用blk_init_queue()
初始化request_queue
,而函数的第一个参数就是请求处理函数的指针。request_queue
会作为参数传递给我们在调用blk_init_queue()时指定的请求处理函数,块设备驱动请求处理函数的原型为:
static void xxx_req(struct request_queue *q);
这个函数不能由驱动自己调用,只有当内核认为是时候让驱动处理对设备的读写等操作时,他才调用这个函数。该函数的主要工作就是发起与request
对应的块设备I/O动作(但是具体的I/O工作不一定要在该函数内同步完成)。
块设备驱动请求函数例程
static void msb_submit_req(struct request_queue *q)
{
struct memstick_dev *card = q->queuedata;
struct msb_data *msb = memstick_get_drvdata(card);
struct request *req = NULL;
dbg_verbose("Submit request");
if (msb->card_dead) { //flag:1
dbg("Refusing requests on removed card");
WARN_ON(!msb->io_queue_stopped);
while ((req = blk_fetch_request(q)) != NULL) //flag:2
__blk_end_request_all(req, BLK_STS_IOERR);
return;
}
if (msb->req)
return;
if (!msb->io_queue_stopped)
queue_work(msb->io_queue, &msb->io_work); //flag:3
}
上述代码flag:2
处使用blk_fetch_request()
获得队列中第一个未完成的请求,但是由于flag:2
处msb->card_dead
成立,实际上我们处理不了该请求,所以就直接通过_blk_end_request_all(req,-ENODEV)
返回错误了。
正常情况下,通过flag:3
处queue_work(msb->io_queue,&msb->io_work)
启动工作队列执行msb_io_work(struct work_struct *work)
这个函数。
msb_io_work()完成请求处理
static void msb_io_work(struct work_struct *work)
{
struct msb_data *msb = container_of(work, struct msb_data, io_work);
int page, error, len;
sector_t lba;
unsigned long flags;
struct scatterlist *sg = msb->prealloc_sg;
dbg_verbose("IO: work started");
while (1) {
spin_lock_irqsave(&msb->q_lock, flags);
if (msb->need_flush_cache) {
msb->need_flush_cache = false;
spin_unlock_irqrestore(&msb->q_lock, flags);
msb_cache_flush(msb);
continue;
}
if (!msb->req) {
msb->req = blk_fetch_request(msb->queue);
if (!msb->req) {
dbg_verbose("IO: no more requests exiting");
spin_unlock_irqrestore(&msb->q_lock, flags);
return;
}
}
spin_unlock_irqrestore(&msb->q_lock, flags);
/* If card was removed meanwhile */
if (!msb->req)
return;
/* process the request */
dbg_verbose("IO: processing new request");
blk_rq_map_sg(msb->queue, msb->req, sg);
lba = blk_rq_pos(msb->req);
sector_div(lba, msb->page_size / 512);
page = sector_div(lba, msb->pages_in_block);
if (rq_data_dir(msb->req) == READ)
error = msb_do_read_request(msb, lba, page, sg,
blk_rq_bytes(msb->req), &len);
else
error = msb_do_write_request(msb, lba, page, sg,
blk_rq_bytes(msb->req), &len);
spin_lock_irqsave(&msb->q_lock, flags);
if (len)
if (!__blk_end_request(msb->req, BLK_STS_OK, len))
msb->req = NULL;
if (error && msb->req) {
blk_status_t ret = errno_to_blk_status(error);
dbg_verbose("IO: ending one sector of the request with error");
if (!__blk_end_request(msb->req, ret, msb->page_size))
msb->req = NULL;
}
if (msb->req)
dbg_verbose("IO: request still pending");
spin_unlock_irqrestore(&msb->q_lock, flags);
}
}
static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */
static int msb_bd_open(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
struct msb_data *msb = disk->private_data;
dbg_verbose("block device open");
mutex_lock(&msb_disk_lock);
if (msb && msb->card)
msb->usage_count++;
mutex_unlock(&msb_disk_lock);
return 0;
}
在读写无错误的情况下,第55行调用的_blk_end_request(msb->req,0,len)
实际上告诉上层该请求处理完成、如果读写有错误,则调用_blk_end_request(msb->req,error,msb->page_size)
,把出错的原因作为第二个参数传入上层。
第38行调用blk_rq_map_sg()
函数实现于block/blk-merge.c
文件,他通过rq_for_each_bio()
、bio_for_each_segment()
来遍历所有的bio
,以及所有的片段,将所有与请求相关的页组成一个scatter/gather
的列表。
/*
* map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries
*/
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist)
{
struct scatterlist *sg = NULL;
int nsegs = 0;
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg);
else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg);
else if (rq->bio)
nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
(blk_rq_bytes(rq) & q->dma_pad_mask)) {
unsigned int pad_len =
(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
sg->length += pad_len;
rq->extra_len += pad_len;
}
if (q->dma_drain_size && q->dma_drain_needed(rq)) {
if (op_is_write(req_op(rq)))
memset(q->dma_drain_buffer, 0, q->dma_drain_size);
sg_unmark_end(sg);
sg = sg_next(sg);
sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
q->dma_drain_size,
((unsigned long)q->dma_drain_buffer) &
(PAGE_SIZE - 1));
nsegs++;
rq->extra_len += q->dma_drain_size;
}
if (sg)
sg_mark_end(sg);
/*
* Something must have been wrong if the figured number of
* segment is bigger than number of req's physical segments
*/
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
return nsegs;
}
一般情况下,若外设支持scatter/gather
模式的DMA操作,紧接着,它就会执行pci_map_sg()
或者dma_map_sg
来进行上述scatter/gathe
r列表的DMA映射了,之后进行硬件的访问。