bcache 是lk中为block提供的一种使用dram来临时缓存dev中的内容,提高对block的读写速度。
使用bcache 来代表一个block的缓存
struct bcache {
bdev_t *dev;
size_t block_size;
int count;
struct bcache_stats stats;
struct list_node free_list;
struct list_node lru_list;
struct bcache_block *blocks;
};
其中的dev代表是具体的block设备,例如usb,emmc等。
free_list和lru_list表示block中free和最近使用的部分
bcache_block 代表具体缓存的内容
在Bcache.h中提供了5个接口给外界使用
bcache_t bcache_create(bdev_t *dev, size_t block_size, int block_count);
void bcache_destroy(bcache_t);
int bcache_read_block(bcache_t, void *, uint block);
// get and put a pointer directly to the block
int bcache_get_block(bcache_t, void **, uint block);
int bcache_put_block(bcache_t, uint block);
也就是说只要使用者5个接口就可以使用bcache这个feature。
我们来分别看一下。
bcache_t bcache_create(bdev_t *dev, size_t block_size, int block_count)
{
struct bcache *cache;
先申请bcache ,并初始化变量。
cache = malloc(sizeof(struct bcache));
cache->dev = dev;
cache->block_size = block_size;
cache->count = block_count;
memset(&cache->stats, 0, sizeof(cache->stats));
list_initialize(&cache->free_list);
list_initialize(&cache->lru_list);
cache->blocks = malloc(sizeof(struct bcache_block) * block_count);
int i;
for (i=0; i < block_count; i++) {
cache->blocks[i].ref_count = 0;
cache->blocks[i].is_dirty = false;
cache->blocks[i].ptr = malloc(block_size);
// add to the free list
list_add_head(&cache->free_list, &cache->blocks[i].node);
}
return (bcache_t)cache;
}
这个接口主要是new一个新的缓存,dev表示具体的设备,block_size 和 block_count表示需要缓存的size,一般情况下小于block设备的size。举个例子usb的size 是4GB,不可能通过bcache_create创建缓存的size也是4GB,因为bcache使用内存缓存内容的,一般情况下可能都没有这么多内容,但是由于经常使用的可能只有几M,所以我们缓存几M就可以了
void bcache_destroy(bcache_t _cache)
{
struct bcache *cache = _cache;
int i;
for (i=0; i < cache->count; i++) {
DEBUG_ASSERT(cache->blocks[i].ref_count == 0);
if (cache->blocks[i].is_dirty)
printf("warning: freeing dirty block %u\n",
cache->blocks[i].blocknum);
free(cache->blocks[i].ptr);
}
free(cache);
}
bcache_destroy和bcache_create执行的操作相反,释放掉bcache_create申请的memory。code很简单就不解释了
不过释放memory的时候会判断是否缓存已经flush到设备中了,如果没有的话,会给出警告信息。
if (cache->blocks[i].is_dirty)
printf("warning: freeing dirty block %u\n",
cache->blocks[i].blocknum);
看第三个函数bcache_read_block,这个函数用于从设备中读取内容,并缓存到dram中,并将DRAM中缓存copy到buf中
int bcache_read_block(bcache_t _cache, void *buf, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("buf %p, blocknum %u\n", buf, blocknum);
先找到这个bcache。
struct bcache_block *block = find_or_fill_block(cache, blocknum);
if (block == NULL) {
/* error */
return -1;
}
将bcache中的内容copy到用户的buf中
memcpy(buf, block->ptr, cache->block_size);
return 0;
}
继续看find_or_fill_block的实现
static struct bcache_block *find_or_fill_block(struct bcache *cache, uint blocknum)
{
int err;
LTRACEF("block %u\n", blocknum);
先看bcache是否已经在缓存中,如果有的话直接返回
struct bcache_block *block = find_block(cache, blocknum);
if (block == NULL) {
LTRACEF("wasn't allocated\n");
/* allocate a new block and fill it */
如果没有的话,先申请一个bache,然后通过bio_read从dev中读取内容到bcache中,然后返回
block = alloc_block(cache);
DEBUG_ASSERT(block);
LTRACEF("wasn't allocated, new block %p\n", block);
block->blocknum = blocknum;
err = bio_read(cache->dev, block->ptr, (off_t)blocknum * cache->block_size, cache->block_size);
if (err < 0) {
/* free the block, return an error */
list_add_tail(&cache->free_list, &block->node);
return NULL;
}
cache->stats.reads++;
}
我们先看是如果找到bcache的
static struct bcache_block *find_block(struct bcache *cache, uint blocknum)
{
uint32_t depth = 0;
struct bcache_block *block;
LTRACEF("num %u\n", blocknum);
block = NULL;
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
LTRACEF("looking at entry %p, num %u\n", block, block->blocknum);
depth++;
if (block->blocknum == blocknum) {
list_delete(&block->node);
list_add_tail(&cache->lru_list, &block->node);
cache->stats.hits++;
cache->stats.depth += depth;
return block;
}
}
cache->stats.misses++;
return NULL;
}
原来是通过bcache_block 中的blocknum来判断的。如果相等则认为找到
struct bcache_block {
struct list_node node;
bnum_t blocknum;
int ref_count;
bool is_dirty;
void *ptr;
};
那blocknum是在哪了赋值的呢?
答案是在调用bcache_get_block的时候。我们后面在分析。
继续看返回NULL的case
如果返回NULL的话,说明缓存不在dram中。因此先调用alloc_block社情一个bcache
/* allocate a new block */
static struct bcache_block *alloc_block(struct bcache *cache)
{
int err;
struct bcache_block *block;
/* pop one off the free list if it's present */
block = list_remove_head_type(&cache->free_list, struct bcache_block, node);
if (block) {
block->ref_count = 0;
list_add_tail(&cache->lru_list, &block->node);
LTRACEF("found block %p on free list\n", block);
return block;
}
/* walk the lru, looking for a free block */
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
LTRACEF("looking at %p, num %u\n", block, block->blocknum);
if (block->ref_count == 0) {
if (block->is_dirty) {
err = flush_block(cache, block);
if (err)
return NULL;
}
// add it to the tail of the lru
list_delete(&block->node);
list_add_tail(&cache->lru_list, &block->node);
return block;
}
}
return NULL;
}
这个函数现在free_list中查找是否有bcache_block ,如果有的话直接返回,如果没有的话,就在lru_list中查找,如果还没有,则说明用户在使用bcache之前没有调用cache_create
找到bcache_block后就调用bio_read来从dev读取内容缓存到dram中
ssize_t bio_read(bdev_t *dev, void *buf, off_t offset, size_t len)
{
LTRACEF("dev '%s', buf %p, offset %lld, len %zd\n", dev->name, buf, offset, len);
DEBUG_ASSERT(dev->ref > 0);
/* range check */
if (offset < 0)
return -1;
if (offset >= dev->size)
return 0;
if (len == 0)
return 0;
if (offset + len > dev->size)
len = dev->size - offset;
return dev->read(dev, buf, offset, len);
}
最终还是要调用dev->read函数
继续看第四个函数bcache_get_block
int bcache_get_block(bcache_t _cache, void **ptr, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("ptr %p, blocknum %u\n", ptr, blocknum);
DEBUG_ASSERT(ptr);
struct bcache_block *block = find_or_fill_block(cache, blocknum);
if (block == NULL) {
/* error */
return -1;
}
/* increment the ref count to keep it from being freed */
block->ref_count++;
*ptr = block->ptr;
return 0;
}
先调用find_or_fill_block找到bcache_block ,增减ref_count。通过指针返回block->ptr,而block->ptr就是用户想要得到的dev中的内容。具体是在find_or_fill_block中的bio_read中赋值的
err = bio_read(cache->dev, block->ptr, (off_t)blocknum * cache->block_size, cache->block_size);
bcache_put_block做的事情仅仅是找到bcache_block ,然后ref_count减一。
int bcache_put_block(bcache_t _cache, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("blocknum %u\n", blocknum);
struct bcache_block *block = find_block(cache, blocknum);
/* be pretty hard on the caller for now */
DEBUG_ASSERT(block);
DEBUG_ASSERT(block->ref_count > 0);
block->ref_count--;
return 0;
}
还有一个bcache_flush函数会将缓存中的内容写会到dev中
int bcache_flush(bcache_t priv)
{
int err;
struct bcache *cache = priv;
struct bcache_block *block;
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
if (block->is_dirty) {
err = flush_block(cache, block);
if (err)
goto exit;
}
}
err = 0;
exit:
return (err);
}
如果block->is_dirty==true 说明用缓存中的内容需要写回到dev中
static int flush_block(struct bcache *cache, struct bcache_block *block)
{
int rc;
rc = bio_write(cache->dev, block->ptr,
(off_t)block->blocknum * cache->block_size,
cache->block_size);
if (rc < 0)
goto exit;
block->is_dirty = false;
cache->stats.writes++;
rc = 0;
exit:
return (rc);
}
调用bio_write写dev
可以通过bcache_mark_block_dirty来设置block->is_dirty = true,然后flush的时候会将缓存写回到dev
int bcache_mark_block_dirty(bcache_t priv, uint blocknum)
{
int err;
struct bcache *cache = priv;
struct bcache_block *block;
block = find_block(cache, blocknum);
if (!block) {
err = -1;
goto exit;
}
block->is_dirty = true;
err = 0;
exit:
return (err);
}
使用bcache 来代表一个block的缓存
struct bcache {
bdev_t *dev;
size_t block_size;
int count;
struct bcache_stats stats;
struct list_node free_list;
struct list_node lru_list;
struct bcache_block *blocks;
};
其中的dev代表是具体的block设备,例如usb,emmc等。
free_list和lru_list表示block中free和最近使用的部分
bcache_block 代表具体缓存的内容
在Bcache.h中提供了5个接口给外界使用
bcache_t bcache_create(bdev_t *dev, size_t block_size, int block_count);
void bcache_destroy(bcache_t);
int bcache_read_block(bcache_t, void *, uint block);
// get and put a pointer directly to the block
int bcache_get_block(bcache_t, void **, uint block);
int bcache_put_block(bcache_t, uint block);
也就是说只要使用者5个接口就可以使用bcache这个feature。
我们来分别看一下。
bcache_t bcache_create(bdev_t *dev, size_t block_size, int block_count)
{
struct bcache *cache;
先申请bcache ,并初始化变量。
cache = malloc(sizeof(struct bcache));
cache->dev = dev;
cache->block_size = block_size;
cache->count = block_count;
memset(&cache->stats, 0, sizeof(cache->stats));
list_initialize(&cache->free_list);
list_initialize(&cache->lru_list);
cache->blocks = malloc(sizeof(struct bcache_block) * block_count);
int i;
for (i=0; i < block_count; i++) {
cache->blocks[i].ref_count = 0;
cache->blocks[i].is_dirty = false;
cache->blocks[i].ptr = malloc(block_size);
// add to the free list
list_add_head(&cache->free_list, &cache->blocks[i].node);
}
return (bcache_t)cache;
}
这个接口主要是new一个新的缓存,dev表示具体的设备,block_size 和 block_count表示需要缓存的size,一般情况下小于block设备的size。举个例子usb的size 是4GB,不可能通过bcache_create创建缓存的size也是4GB,因为bcache使用内存缓存内容的,一般情况下可能都没有这么多内容,但是由于经常使用的可能只有几M,所以我们缓存几M就可以了
void bcache_destroy(bcache_t _cache)
{
struct bcache *cache = _cache;
int i;
for (i=0; i < cache->count; i++) {
DEBUG_ASSERT(cache->blocks[i].ref_count == 0);
if (cache->blocks[i].is_dirty)
printf("warning: freeing dirty block %u\n",
cache->blocks[i].blocknum);
free(cache->blocks[i].ptr);
}
free(cache);
}
bcache_destroy和bcache_create执行的操作相反,释放掉bcache_create申请的memory。code很简单就不解释了
不过释放memory的时候会判断是否缓存已经flush到设备中了,如果没有的话,会给出警告信息。
if (cache->blocks[i].is_dirty)
printf("warning: freeing dirty block %u\n",
cache->blocks[i].blocknum);
看第三个函数bcache_read_block,这个函数用于从设备中读取内容,并缓存到dram中,并将DRAM中缓存copy到buf中
int bcache_read_block(bcache_t _cache, void *buf, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("buf %p, blocknum %u\n", buf, blocknum);
先找到这个bcache。
struct bcache_block *block = find_or_fill_block(cache, blocknum);
if (block == NULL) {
/* error */
return -1;
}
将bcache中的内容copy到用户的buf中
memcpy(buf, block->ptr, cache->block_size);
return 0;
}
继续看find_or_fill_block的实现
static struct bcache_block *find_or_fill_block(struct bcache *cache, uint blocknum)
{
int err;
LTRACEF("block %u\n", blocknum);
先看bcache是否已经在缓存中,如果有的话直接返回
struct bcache_block *block = find_block(cache, blocknum);
if (block == NULL) {
LTRACEF("wasn't allocated\n");
/* allocate a new block and fill it */
如果没有的话,先申请一个bache,然后通过bio_read从dev中读取内容到bcache中,然后返回
block = alloc_block(cache);
DEBUG_ASSERT(block);
LTRACEF("wasn't allocated, new block %p\n", block);
block->blocknum = blocknum;
err = bio_read(cache->dev, block->ptr, (off_t)blocknum * cache->block_size, cache->block_size);
if (err < 0) {
/* free the block, return an error */
list_add_tail(&cache->free_list, &block->node);
return NULL;
}
cache->stats.reads++;
}
我们先看是如果找到bcache的
static struct bcache_block *find_block(struct bcache *cache, uint blocknum)
{
uint32_t depth = 0;
struct bcache_block *block;
LTRACEF("num %u\n", blocknum);
block = NULL;
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
LTRACEF("looking at entry %p, num %u\n", block, block->blocknum);
depth++;
if (block->blocknum == blocknum) {
list_delete(&block->node);
list_add_tail(&cache->lru_list, &block->node);
cache->stats.hits++;
cache->stats.depth += depth;
return block;
}
}
cache->stats.misses++;
return NULL;
}
原来是通过bcache_block 中的blocknum来判断的。如果相等则认为找到
struct bcache_block {
struct list_node node;
bnum_t blocknum;
int ref_count;
bool is_dirty;
void *ptr;
};
那blocknum是在哪了赋值的呢?
答案是在调用bcache_get_block的时候。我们后面在分析。
继续看返回NULL的case
如果返回NULL的话,说明缓存不在dram中。因此先调用alloc_block社情一个bcache
/* allocate a new block */
static struct bcache_block *alloc_block(struct bcache *cache)
{
int err;
struct bcache_block *block;
/* pop one off the free list if it's present */
block = list_remove_head_type(&cache->free_list, struct bcache_block, node);
if (block) {
block->ref_count = 0;
list_add_tail(&cache->lru_list, &block->node);
LTRACEF("found block %p on free list\n", block);
return block;
}
/* walk the lru, looking for a free block */
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
LTRACEF("looking at %p, num %u\n", block, block->blocknum);
if (block->ref_count == 0) {
if (block->is_dirty) {
err = flush_block(cache, block);
if (err)
return NULL;
}
// add it to the tail of the lru
list_delete(&block->node);
list_add_tail(&cache->lru_list, &block->node);
return block;
}
}
return NULL;
}
这个函数现在free_list中查找是否有bcache_block ,如果有的话直接返回,如果没有的话,就在lru_list中查找,如果还没有,则说明用户在使用bcache之前没有调用cache_create
找到bcache_block后就调用bio_read来从dev读取内容缓存到dram中
ssize_t bio_read(bdev_t *dev, void *buf, off_t offset, size_t len)
{
LTRACEF("dev '%s', buf %p, offset %lld, len %zd\n", dev->name, buf, offset, len);
DEBUG_ASSERT(dev->ref > 0);
/* range check */
if (offset < 0)
return -1;
if (offset >= dev->size)
return 0;
if (len == 0)
return 0;
if (offset + len > dev->size)
len = dev->size - offset;
return dev->read(dev, buf, offset, len);
}
最终还是要调用dev->read函数
继续看第四个函数bcache_get_block
int bcache_get_block(bcache_t _cache, void **ptr, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("ptr %p, blocknum %u\n", ptr, blocknum);
DEBUG_ASSERT(ptr);
struct bcache_block *block = find_or_fill_block(cache, blocknum);
if (block == NULL) {
/* error */
return -1;
}
/* increment the ref count to keep it from being freed */
block->ref_count++;
*ptr = block->ptr;
return 0;
}
先调用find_or_fill_block找到bcache_block ,增减ref_count。通过指针返回block->ptr,而block->ptr就是用户想要得到的dev中的内容。具体是在find_or_fill_block中的bio_read中赋值的
err = bio_read(cache->dev, block->ptr, (off_t)blocknum * cache->block_size, cache->block_size);
bcache_put_block做的事情仅仅是找到bcache_block ,然后ref_count减一。
int bcache_put_block(bcache_t _cache, uint blocknum)
{
struct bcache *cache = _cache;
LTRACEF("blocknum %u\n", blocknum);
struct bcache_block *block = find_block(cache, blocknum);
/* be pretty hard on the caller for now */
DEBUG_ASSERT(block);
DEBUG_ASSERT(block->ref_count > 0);
block->ref_count--;
return 0;
}
还有一个bcache_flush函数会将缓存中的内容写会到dev中
int bcache_flush(bcache_t priv)
{
int err;
struct bcache *cache = priv;
struct bcache_block *block;
list_for_every_entry(&cache->lru_list, block, struct bcache_block, node) {
if (block->is_dirty) {
err = flush_block(cache, block);
if (err)
goto exit;
}
}
err = 0;
exit:
return (err);
}
如果block->is_dirty==true 说明用缓存中的内容需要写回到dev中
static int flush_block(struct bcache *cache, struct bcache_block *block)
{
int rc;
rc = bio_write(cache->dev, block->ptr,
(off_t)block->blocknum * cache->block_size,
cache->block_size);
if (rc < 0)
goto exit;
block->is_dirty = false;
cache->stats.writes++;
rc = 0;
exit:
return (rc);
}
调用bio_write写dev
可以通过bcache_mark_block_dirty来设置block->is_dirty = true,然后flush的时候会将缓存写回到dev
int bcache_mark_block_dirty(bcache_t priv, uint blocknum)
{
int err;
struct bcache *cache = priv;
struct bcache_block *block;
block = find_block(cache, blocknum);
if (!block) {
err = -1;
goto exit;
}
block->is_dirty = true;
err = 0;
exit:
return (err);
}