【leveldb】Cache（十八）：BlockCache

最新推荐文章于 2022-06-11 00:45:11 发布

奔跑的哇牛

最新推荐文章于 2022-06-11 00:45:11 发布

阅读量997

点赞数 1

分类专栏： leveldb源码阅读总结文章标签： leveldb BlockCache

本文链接：https://blog.csdn.net/h514434485/article/details/107122896

版权

leveldb源码阅读总结专栏收录该内容

24 篇文章 37 订阅

订阅专栏

针对BlockCache的结构说明已在此篇文章中说明leveldb 内部整体Cache结构说明。
BlockCache中缓存的就是实际的KV数据，也就是DataBlock数据。这里为便于理解，可看如下BlockCache结构图：
在这里插入图片描述

图1

BlockCache：

key： 当前table对应的缓存id + BlockData在ldb文件中的偏移位。
Value： 就是真实的BlockData数据。

注：
因为打开的ldb（就是sst）文件中的BlockData都是存放于全局一份的BlockCache中的，
而不同的ldb文件其BlockData的offset可能相同，为了区分不同ldb文件中的BlockData的
offset，所以要给每个ldb文件分配一个唯一的 cache_id，这样key = cache_id + block_offset的组合就是唯一的了。

Table.cc

BlockCache的使用主要封装在Table类结构中的，此处则解读Table.cc类

namespace leveldb {

struct Table::Rep {
  ~Rep() {
    delete filter;
    delete[] filter_data;
    delete index_block;
  }

  Options options;
  Status status;
  RandomAccessFile* file; //ldb文件句柄
  uint64_t cache_id;      //Cache缓存分配给当前DataBlock的唯一id
  FilterBlockReader* filter; //读取FilterBlock实例
  const char* filter_data;   //指向filter 数据

  //index block在ldb文件中的位置信息
  BlockHandle metaindex_handle;  // Handle to metaindex_block: saved from footer
  Block* index_block;  //index_block的操作实例
};

//打开SSTable时，首先将index block读取出来，
//用于后期查询key时，先通过内存中的index block来
//判断key在不在这个SSTable，然后再决定是否去读取对应的data block。
//这样明显可减少I/O操作。
Status Table::Open(const Options& options, RandomAccessFile* file,
                   uint64_t size, Table** table) {
  *table = nullptr;
  //SSTable的Footer就是48Byte
  if (size < Footer::kEncodedLength) {
    return Status::Corruption("file is too short to be an sstable");
  }

  char footer_space[Footer::kEncodedLength];
  Slice footer_input;
  //将footer读出来，用于解析其中的metaindex_block_handle和
  //index_block_handle。
  Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
                        &footer_input, footer_space);
  if (!s.ok()) return s;

  //1、解析出metaindex_block_handle；
  //2、解析出index_block_handle。
  Footer footer;
  s = footer.DecodeFrom(&footer_input);
  if (!s.ok()) return s;

  // Read the index block
  BlockContents index_block_contents;
  if (s.ok()) {
    ReadOptions opt;
	//是否开启严格检查数据完整性，默认false
	//开启之后可能会因为部分数据异常导致整个数据库无法读。
    if (options.paranoid_checks) {
      opt.verify_checksums = true;
    }

	//将index_block读出。
	//1、安装offset去sstable位置读取数据；
	//2、若开启校验则校验；
	//3、若数据压缩则解压。
    s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents);
  }

  if (s.ok()) {
    // We've successfully read the footer and the index block: we're
    // ready to serve requests.
    Block* index_block = new Block(index_block_contents);
    Rep* rep = new Table::Rep;
    rep->options = options;
    rep->file = file;
    rep->metaindex_handle = footer.metaindex_handle();
    rep->index_block = index_block;
	//涉及到对Cache管理了，这里暂时不清楚此cache_id的作用。
    rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
    rep->filter_data = nullptr;
    rep->filter = nullptr;
	//实例一个table，用于对sstable读取解析
    *table = new Table(rep);

	//读取filte block
    (*table)->ReadMeta(footer);
  }

  return s;
}

void Table::ReadMeta(const Footer& footer) {
  //过滤策略都没有，那就可以确定没必要读filter block了
  if (rep_->options.filter_policy == nullptr) {
    return;  // Do not need any metadata
  }

  // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
  // it is an empty block.
  ReadOptions opt;
  if (rep_->options.paranoid_checks) {
    opt.verify_checksums = true;
  }

  //根据metaindex_handle读取metaindex block
  BlockContents contents;
  if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
    // Do not propagate errors since meta info is not needed for operation
    return;
  }

  //这里是疑惑的地方！！！！！！
  Block* meta = new Block(contents);

  Iterator* iter = meta->NewIterator(BytewiseComparator());
  std::string key = "filter.";
  key.append(rep_->options.filter_policy->Name());
  iter->Seek(key);
  if (iter->Valid() && iter->key() == Slice(key)) {

    //根据metaindex的offset+size去读取filter block
    ReadFilter(iter->value());
  }
  delete iter;
  delete meta;
}

void Table::ReadFilter(const Slice& filter_handle_value) {
  Slice v = filter_handle_value;
  BlockHandle filter_handle;
  if (!filter_handle.DecodeFrom(&v).ok()) {
    return;
  }

  // We might want to unify with ReadBlock() if we start
  // requiring checksum verification in Table::Open.
  ReadOptions opt;
  if (rep_->options.paranoid_checks) {
    opt.verify_checksums = true;
  }

  //读取filter block 数据
  BlockContents block;
  if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
    return;
  }

  //如果heap_allocated为true表示读取
  //filter block的时候new了内存，后续需要删除
  if (block.heap_allocated) {
    rep_->filter_data = block.data.data();  // Will need to delete later
  }

  //构造一个读取filter block的实例
  rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
}

Table::~Table() { delete rep_; }

static void DeleteBlock(void* arg, void* ignored) {
  delete reinterpret_cast<Block*>(arg);
}

static void DeleteCachedBlock(const Slice& key, void* value) {
  Block* block = reinterpret_cast<Block*>(value);
  delete block;
}

static void ReleaseBlock(void* arg, void* h) {
  Cache* cache = reinterpret_cast<Cache*>(arg);
  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
  cache->Release(handle);
}

//根据index_value(即offset+size)，读取对应的block。
// Convert an index iterator value (i.e., an encoded BlockHandle)
// into an iterator over the contents of the corresponding block.
Iterator* Table::BlockReader(void* arg, const ReadOptions& options,
                             const Slice& index_value) {
  Table* table = reinterpret_cast<Table*>(arg);
  Cache* block_cache = table->rep_->options.block_cache;
  Block* block = nullptr;
  Cache::Handle* cache_handle = nullptr;

  BlockHandle handle;
  Slice input = index_value;
  Status s = handle.DecodeFrom(&input);
  // We intentionally allow extra stuff in index_value so that we
  // can add more features in the future.

  if (s.ok()) {
    BlockContents contents;
    if (block_cache != nullptr) {
      //如果开启了block_cache，则先去此cache中查找
	  //key就是id+DataBlock的offset。（此处暂时不解读Cache相关实现）
      char cache_key_buffer[16];
      EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
      EncodeFixed64(cache_key_buffer + 8, handle.offset());
      Slice key(cache_key_buffer, sizeof(cache_key_buffer));
      cache_handle = block_cache->Lookup(key);

	  //1、若在cache中查找到了直接将地址赋值给block;
	  //2、若为找到，则去SSTable文件中去查找
      if (cache_handle != nullptr) {
        block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
      } else {
        s = ReadBlock(table->rep_->file, options, handle, &contents);
        if (s.ok()) {
          block = new Block(contents);
		  //若读取的Block是直接new的，且fill_cache,则将这个Block缓存起来。
          if (contents.cachable && options.fill_cache) {
            cache_handle = block_cache->Insert(key, block, block->size(),
                                               &DeleteCachedBlock);
          }
        }
      }
    } else {
     
      //3、若为使用block_cache，则直接去SSTable中去读数据。
      s = ReadBlock(table->rep_->file, options, handle, &contents);
      if (s.ok()) {
        block = new Block(contents);
      }
    }
  }

  Iterator* iter;
  if (block != nullptr) {
    iter = block->NewIterator(table->rep_->options.comparator);
	//1、cache_handle 为null,表示block不在缓存中，在迭代器iter析构时，
	//   直接删除这个block。
	//2、cache_handle非null,表示block在缓存中，在迭代器iter析构时,
	//   通过ReleaseBlock，减少其一次引用计数。
    if (cache_handle == nullptr) {
      iter->RegisterCleanup(&DeleteBlock, block, nullptr);
    } else {
      iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
    }
  } else {
    //若未获取到block，则直接生存一个错误迭代器返回。
    iter = NewErrorIterator(s);
  }
  return iter;
}

//SSTable二层迭代器迭代器。
Iterator* Table::NewIterator(const ReadOptions& options) const {
  return NewTwoLevelIterator(
      rep_->index_block->NewIterator(rep_->options.comparator),
      &Table::BlockReader, const_cast<Table*>(this), options);
}

Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg,
                          void (*handle_result)(void*, const Slice&,
                                                const Slice&)) {
  Status s;
  //通过key，找到index block中的一条对应DataBlock的记录
  Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
  iiter->Seek(k);
  //Seek到
  if (iiter->Valid()) {

	//hanlde_vale就是返回的DataBlock的offset+size。   
    Slice handle_value = iiter->value();
    FilterBlockReader* filter = rep_->filter;
    BlockHandle handle;

	//如果过滤策略非空，则通过DataBlock的offset,去Filter中去查找是否有此key
    if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
        !filter->KeyMayMatch(handle.offset(), k)) {
      // Not found
    } else {

      //如果在Filte Block中查找到了（不一定真的查找到），那就去DataBlock中去查找。
	  //通过DataBlock的offset+size去创建一个读取DataBlock的迭代器
      Iterator* block_iter = BlockReader(this, options, iiter->value());

	  //Seek要查找的key
      block_iter->Seek(k);
      if (block_iter->Valid()) {

         //查找到key之后，执行传入的方法函数
        (*handle_result)(arg, block_iter->key(), block_iter->value());
      }
      s = block_iter->status();
      delete block_iter;
    }
  }
  if (s.ok()) {
    s = iiter->status();
  }
  delete iiter;
  return s;
}


//预估key的大致偏移位。
//1、在index_block中查找到了就返回index_block中对应的DataBlock的offset。
//2、如果在index_block中查找到了但是无法解码出offset+size,就默认给metaindex_block的offset。
//3、Seek是查到大于等于这个key的值，若未找到，说明这个key比较大，默认给metaindex_block的offset。
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
  Iterator* index_iter =
      rep_->index_block->NewIterator(rep_->options.comparator);
  index_iter->Seek(key);
  uint64_t result;
  if (index_iter->Valid()) {
    BlockHandle handle;
    Slice input = index_iter->value();
    Status s = handle.DecodeFrom(&input);
    if (s.ok()) {
      result = handle.offset();
    } else {
      // Strange: we can't decode the block handle in the index block.
      // We'll just return the offset of the metaindex block, which is
      // close to the whole file size for this case.
      result = rep_->metaindex_handle.offset();
    }
  } else {
    // key is past the last key in the file.  Approximate the offset
    // by returning the offset of the metaindex block (which is
    // right near the end of the file).
    result = rep_->metaindex_handle.offset();
  }
  delete index_iter;
  return result;
}

}  // namespace leveldb