BDI-1

kernel-2.6.32以后脏数据的下刷彻底取消了pdflush,而是将此部分功能添加到BDI机制中,并且是为每个设备创建了一个名为“flush-设备主次设备号”的线程,用于脏数据的下刷。

backing_dev_info结构体

struct backing_dev_info {
	struct list_head bdi_list;
	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
	unsigned long state;	/* Always use atomic bitops on this */
	unsigned int capabilities; /* Device capabilities */
	congested_fn *congested_fn; /* Function pointer if device is md/dm */
	void *congested_data;	/* Pointer to aux data for congested func */

	char *name;

	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];

	unsigned long bw_time_stamp;	/* last time write bw is updated */
	unsigned long dirtied_stamp;
	unsigned long written_stamp;	/* pages written at bw_time_stamp */
	unsigned long write_bandwidth;	/* the estimated write bandwidth */
	unsigned long avg_write_bandwidth; /* further smoothed write bw */

	/*
	 * The base dirty throttle rate, re-calculated on every 200ms.
	 * All the bdi tasks' dirty rate will be curbed under it.
	 * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
	 * in small steps and is much more smooth/stable than the latter.
	 */
	unsigned long dirty_ratelimit;
	unsigned long balanced_dirty_ratelimit;

	struct fprop_local_percpu completions;
	int dirty_exceeded;

	unsigned int min_ratio;
	unsigned int max_ratio, max_prop_frac;

	struct bdi_writeback wb;  /* default writeback info for this bdi */
	spinlock_t wb_lock;	  /* protects work_list */

	struct list_head work_list;

	struct device *dev;

	struct timer_list laptop_mode_wb_timer;

#ifdef CONFIG_DEBUG_FS
	struct dentry *debug_dir;
	struct dentry *debug_stats;
#endif
};

对应的state状态:

enum bdi_state {
	BDI_wb_alloc,		/* Default embedded wb allocated */ 表示该设备上已经申请出一个下刷任务
	BDI_async_congested,	/* The async (write) queue is getting full */
	BDI_sync_congested,	/* The sync queue is getting full */
	BDI_REGISTERED,		/* bdi_register() was done */ 表示该设备已经创建了flush线程
	BDI_writeback_running,	/* Writeback is in progress */
	BDI_unused,		/* Available bits start here */
};

用于sys接口计数统计的数值:

enum bdi_stat_item {
	BDI_RECLAIMABLE,
	BDI_WRITEBACK,
	BDI_DIRTIED,
	BDI_WRITTEN,
	NR_BDI_STAT_ITEMS
};

初始化函数blk_alloc_queue_node,初始化ra_pages、state、capabilities、unplug_io_fn、unplug_io_data、name,bdi_init函数初始化其他的值。

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
	struct request_queue *q;
	int err;

	q = kmem_cache_alloc_node(blk_requestq_cachep,
				gfp_mask | __GFP_ZERO, node_id);
	if (!q)
		return NULL;

	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
	if (q->id < 0)
		goto fail_q;

	q->backing_dev_info.ra_pages =
			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
	q->backing_dev_info.state = 0;
	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
	q->backing_dev_info.name = "block";
	q->node = node_id;

	err = bdi_init(&q->backing_dev_info);

	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
		    laptop_mode_timer_fn, (unsigned long) q); 设置5秒超时执行下刷函数
	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);

backing_dev_info结构中描述每个设备的一些bdi状态信息,BDI中提供了一个sys接口,在/sys/kernel/debug/bdi目录下,每个设备以设备号区分,可以查看每个设备与bdi有关的状态等。

下面是CentOS的信息:

海思3520的内核3.10.y没有此信息,但是看内核中是由开CONFIG_DEBUG_FS宏的,也没有对应的打印,为啥?

内核启动

1.  创建名为sync_supers的线程,此线程由定时器来唤醒,此外无其他唤醒模式。每5s钟被唤醒执行一次函数sync_supers,用来下刷系统super_blocks链表中所有的元数据块信息。没看到代码实现啊?

2、定义一个默认的结构backing_dev_info,同时会创建一个线程bdi-default。

struct backing_dev_info default_backing_dev_info = {
	.name		= "default",
	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
	.state		= 0,
	.capabilities	= BDI_CAP_MAP_COPY,
};
EXPORT_SYMBOL_GPL(default_backing_dev_info);
static int __init default_bdi_init(void)
{
	int err;

	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
					      WQ_UNBOUND | WQ_SYSFS, 0);
	if (!bdi_wq)
		return -ENOMEM;

	err = bdi_init(&default_backing_dev_info);
	if (!err)
		bdi_register(&default_backing_dev_info, NULL, "default");
	err = bdi_init(&noop_backing_dev_info);

	return err;
}
subsys_initcall(default_bdi_init);

3、有新的设备分区添加时,add_disk会为每个设备定义一个结构backing_dev_info,然后将此结构挂到bdi_list链表尾;

void add_disk(struct gendisk *disk)
{
	struct backing_dev_info *bdi;
	/* Register BDI before referencing it from bdev */
	bdi = &disk->queue->backing_dev_info;
	bdi_register_dev(bdi, disk_devt(disk));
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
{
	return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
}
EXPORT_SYMBOL(bdi_register_dev);
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
		const char *fmt, ...)
{
	va_list args;
	struct device *dev;

	if (bdi->dev)	/* The driver needs to use separate queues per device */
		return 0;

	va_start(args, fmt);
	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
	va_end(args);
	if (IS_ERR(dev))
		return PTR_ERR(dev);

	bdi->dev = dev;

	bdi_debug_register(bdi, dev_name(dev));
	set_bit(BDI_REGISTERED, &bdi->state);

	spin_lock_bh(&bdi_lock);
	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
	spin_unlock_bh(&bdi_lock);

	trace_writeback_bdi_register(bdi);
	return 0;
}
EXPORT_SYMBOL(bdi_register);

下刷动作

wb_writeback_work用来描述一次下刷任务的信息,如该次下刷任务以何种模式下刷,下刷多少数据等,多用在唤醒下刷线程之前定义。

struct wb_writeback_work {
	long nr_pages;
	struct super_block *sb;
	unsigned long *older_than_this;
	enum writeback_sync_modes sync_mode;
	unsigned int tagged_writepages:1;
	unsigned int for_kupdate:1;
	unsigned int range_cyclic:1;
	unsigned int for_background:1;
	enum wb_reason reason;		/* why was writeback initiated? */

	struct list_head list;		/* pending work list */
	struct completion *done;	/* set if the caller waits */
};

触发下刷的操作时,可以将此work挂到一个工作队列中,由下刷线程来完成任务;也可以用此结构的信息,指导结构writeback_control中的值,然后直接调用下刷函数,完成工作任务。

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值