Linux SYNC-block

上一篇分析了fs层下的sync系统调用流程,这一篇分析block层的bio申请。

BIO

fs层下刷的pages通过调用submit_bio把地址连续的page进行合并,

void submit_bio(int rw, struct bio *bio)
{
	bio->bi_rw |= rw;

	/*
	 * If it's a regular read/write or a barrier with data attached,
	 * go through the normal accounting stuff before submission.
	 */
	if (bio_has_data(bio)) {
		unsigned int count;

		if (unlikely(rw & REQ_WRITE_SAME))
			count = bdev_logical_block_size(bio->bi_bdev) >> 9;
		else
			count = bio_sectors(bio);

		if (rw & WRITE) {
			count_vm_events(PGPGOUT, count);
		} else {
			task_io_account_read(bio->bi_size);
			count_vm_events(PGPGIN, count);
		}

		if (unlikely(block_dump)) {
			char b[BDEVNAME_SIZE];
			printk(KERN_INFO "%s(%d): %s block %Lu on %s (%u sectors)\n",
			current->comm, task_pid_nr(current),
				(rw & WRITE) ? "WRITE" : "READ",
				(unsigned long long)bio->bi_sector * 512 / 4096,
				bdevname(bio->bi_bdev, b),
				count);
		}

		io_bio+=(count/8); 这里可以统计总下发的page个数
	}
	
	generic_make_request(bio);
}
EXPORT_SYMBOL(submit_bio);

下发的bio最后调用generic_make_request函数到调度器中,进行bio的合并,最后形成request,request最后通过回调函数scsi_request_fn来进行cmd的组装,然后发送cmd到scsi驱动层进行写到磁盘。

void generic_make_request(struct bio *bio)
{
	bio_list_init(&bio_list_on_stack);
	current->bio_list = &bio_list_on_stack;
	do {
		struct request_queue *q = bdev_get_queue(bio->bi_bdev);

		q->make_request_fn(q, bio);

		bio = bio_list_pop(current->bio_list);
	} while (bio);
	current->bio_list = NULL; /* deactivate */
}

最后调用make_request_fn函数,下面看下make_request_fn函数在哪儿赋值的,看下面

struct request_queue *
blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
			 spinlock_t *lock)
{
	blk_queue_make_request(q, blk_queue_bio);

可以看到调用的是blk_queue_bio函数,这里有几个重要的函数:

	request_fn_proc		*request_fn; 这里需要解释
	make_request_fn		*make_request_fn;
	prep_rq_fn		*prep_rq_fn;
	unprep_rq_fn		*unprep_rq_fn;
	merge_bvec_fn		*merge_bvec_fn;
	softirq_done_fn		*softirq_done_fn;
	rq_timed_out_fn		*rq_timed_out_fn;
	dma_drain_needed_fn	*dma_drain_needed;
	lld_busy_fn		*lld_busy_fn;

看blk_queue_bio函数做了什么,这个函数主要是合并bio到request,主要有3个地方进行合并:

void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
	if (attempt_plug_merge(q, bio, &request_count)) 尝试合入
	{
		return;
	}

	el_ret = elv_merge(q, &req, bio); 向后或者向前合入
	if (el_ret == ELEVATOR_BACK_MERGE) {
		if (bio_attempt_back_merge(q, req, bio)) {
			elv_bio_merged(q, req, bio);
			if (!attempt_back_merge(q, req))
				elv_merged_request(q, req, el_ret);
			goto out_unlock;
		}
	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
		if (bio_attempt_front_merge(q, req, bio)) {
			elv_bio_merged(q, req, bio);
			if (!attempt_front_merge(q, req))
				elv_merged_request(q, req, el_ret);
			goto out_unlock;
		}
	}

	req = get_request(q, rw_flags, bio, GFP_NOIO); 重新申请一个新的request
	if (unlikely(!req)) {
		bio_endio(bio, -ENODEV);	/* @q is dead */
		goto out_unlock;
	}
	init_request_from_bio(req, bio);

合并完成并加入到调度器里面后,满足条件后就进行unplug开始叫scsi发命令写数据了,调用的是q->request_fn(q);

inline void __blk_run_queue_uncond(struct request_queue *q)
{
	if (unlikely(blk_queue_dead(q)))
		return;

	/*
	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
	 * the queue lock internally. As a result multiple threads may be
	 * running such a request function concurrently. Keep track of the
	 * number of active request_fn invocations such that blk_drain_queue()
	 * can wait until all these request_fn calls have finished.
	 */
	q->request_fn_active++;
	q->request_fn(q);
	q->request_fn_active--;
}

看下request_fn在哪儿赋值的,和上面make_request_fn赋值在一个地方。

scsi_alloc_queue
	__scsi_alloc_queue(sdev->host, scsi_request_fn);
		blk_init_queue(request_fn, NULL);
			blk_init_allocated_queue(uninit_q, rfn, lock);					
				q->request_fn		= rfn;	

看下scsi_request_fn做了什么,比较重要的函数是blk_peek_request,把request取出组装成scsi认识的cmd模式。最后把cmd发给scsi干事情了。

static void scsi_request_fn(struct request_queue *q)
{
	for (;;) {
		int rtn;
		/*
		 * get next queueable request.  We do this early to make sure
		 * that the request is fully prepared even if we cannot 
		 * accept it.
		 */
		req = blk_peek_request(q);

		cmd = req->special;
		scsi_init_cmd_errh(cmd);
		rtn = scsi_dispatch_cmd(cmd);

		io_page += (scsi_bufflen(cmd)/4096); 这里可以统计多少个page下发的scsi

重点分析request到cmd的组装函数prep_rq_fn过程:

struct request *blk_peek_request(struct request_queue *q)
{
	struct request *rq;
	int ret;

	while ((rq = __elv_next_request(q)) != NULL) {

		rq = blk_pm_peek_request(q, rq);

		if (!q->prep_rq_fn)
			break;

		ret = q->prep_rq_fn(q, rq);

prep_rq_fn函数在哪儿赋值的呢,drivers/scsi/sd.c 

sd_probe
	async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
		blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
			q->prep_rq_fn = pfn;

看下prep_rq_fn函数实现:从头的变量定义就可以看出是request到scsi的转换:

static int sd_prep_fn(struct request_queue *q, struct request *rq)
{
	struct scsi_cmnd *SCpnt;
	struct scsi_device *sdp = q->queuedata;
	struct gendisk *disk = rq->rq_disk;
	struct scsi_disk *sdkp;

看起来这个函数还有点复杂,等熟悉后继续写。。。

 

分析3.18.29内核代码

drivers/scsi/sd.c

static struct scsi_driver sd_template = {
	.owner			= THIS_MODULE,
	.gendrv = {
		.name		= "sd",
		.probe		= sd_probe,
		.remove		= sd_remove,
		.shutdown	= sd_shutdown,
		.pm		= &sd_pm_ops,
	},
	.rescan			= sd_rescan,
	.init_command		= sd_init_command,
	.uninit_command		= sd_uninit_command,
	.done			= sd_done,
	.eh_action		= sd_eh_action,
};
static int sd_init_command(struct scsi_cmnd *cmd)
{
	struct request *rq = cmd->request;

	if (rq->cmd_flags & REQ_DISCARD)
		return sd_setup_discard_cmnd(cmd);
	else if (rq->cmd_flags & REQ_WRITE_SAME)
		return sd_setup_write_same_cmnd(cmd);
	else if (rq->cmd_flags & REQ_FLUSH)
		return sd_setup_flush_cmnd(cmd);
	else
		return sd_setup_read_write_cmnd(cmd);
}
static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
{
	struct request *rq = SCpnt->request;
	struct scsi_device *sdp = SCpnt->device;
	struct gendisk *disk = rq->rq_disk;
	struct scsi_disk *sdkp;
	sector_t block = blk_rq_pos(rq);
	sector_t threshold;
	unsigned int this_count = blk_rq_sectors(rq);
	unsigned int dif, dix;
	int ret;
	unsigned char protect;

	/*
	 * Some SD card readers can't handle multi-sector accesses which touch
	 * the last one or two hardware sectors.  Split accesses as needed.
	 */
	threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
		(sdp->sector_size / 512);

	if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
		if (block < threshold) {
			/* Access up to the threshold but not beyond */
			this_count = threshold - block;
		} else {
			/* Access only a single hardware sector */
			this_count = sdp->sector_size / 512;
		}
	}

drivers/scsi/scsi_lib.c

struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
{
	blk_queue_prep_rq(q, scsi_prep_fn);
static int scsi_prep_fn(struct request_queue *q, struct request *req)
{
	struct scsi_device *sdev = q->queuedata;
	struct scsi_cmnd *cmd;
	int ret;

	ret = scsi_prep_state_check(sdev, req);
	if (ret != BLKPREP_OK)
		goto out;

	cmd = scsi_get_cmd_from_req(sdev, req);
	if (unlikely(!cmd)) {
		ret = BLKPREP_DEFER;
		goto out;
	}

	ret = scsi_setup_cmnd(sdev, req);
out:
	return scsi_prep_return(q, req, ret);
}
static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
{
	struct scsi_cmnd *cmd = req->special;
	case REQ_TYPE_FS:
		return scsi_setup_fs_cmnd(sdev, req);
static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
{
	struct scsi_cmnd *cmd = req->special;
	return scsi_cmd_to_driver(cmd)->init_command(cmd);
}

scsi怎么处理cmd的呢?对单个sector请求是怎么处理的?

static void scsi_request_fn(struct request_queue *q)
	__releases(q->queue_lock)
	__acquires(q->queue_lock)
{
	for (;;) {
		int rtn;
		/*
		 * get next queueable request.  We do this early to make sure
		 * that the request is fully prepared even if we cannot
		 * accept it.
		 */
		req = blk_peek_request(q);
		/*
		 * Dispatch the command to the low-level driver.
		 */
		cmd->scsi_done = scsi_done;
		rtn = scsi_dispatch_cmd(cmd);
int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
{
	rtn = host->hostt->queuecommand(host, cmd);

我们是USB转SD卡,调用那个接口呢?

mmc是drivers/mmc/card/queue.c:203:    mq->queue = blk_init_queue(mmc_request_fn, lock);

命令执行的软中断下半部分:http://blog.chinaunix.net/uid-15456765-id-3203506.html

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值