leveldb源码分析：数据插入续(跳表)

最新推荐文章于 2024-06-07 00:45:43 发布

小屋子大侠

最新推荐文章于 2024-06-07 00:45:43 发布

阅读量337

点赞数

分类专栏：分布式相关文章标签： leveldb源码

本文链接：https://blog.csdn.net/qq_33339479/article/details/101033718

版权

分布式相关专栏收录该内容

18 篇文章 1 订阅

订阅专栏

leveldb数据的插入-跳表

本文主要是接着上一篇文章，继续深入探索Write函数调用插入之后的流程。

status = WriteBatchInternal::InsertInto(updates, mem_);

InsertInto插入数据函数

namespace {
class MemTableInserter : public WriteBatch::Handler {                     // MemTable插入类
 public:
  SequenceNumber sequence_;
  MemTable* mem_;

  void Put(const Slice& key, const Slice& value) override {               // 添加内容
    mem_->Add(sequence_, kTypeValue, key, value);                         // 添加序列号 插入类型  key  value
    sequence_++;
  }
  void Delete(const Slice& key) override {
    mem_->Add(sequence_, kTypeDeletion, key, Slice());                    // 添加内容 序列号  删除类型  key 空的value
    sequence_++;
  }
};
}  // namespace

Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* memtable) {
  MemTableInserter inserter;
  inserter.sequence_ = WriteBatchInternal::Sequence(b);                   // 先获取序列号
  inserter.mem_ = memtable;                                               // 设置memtabe
  return b->Iterate(&inserter);                                           // 迭代插入
}

可以得知，真正的插入数据的操作是在调用InsertInto函数，将序列化好的数据设置到inserter的sequence_属性中，传入当前的memtable，此时就调用WriteBatch的Iterate方法，来插入数据。

Status WriteBatch::Iterate(Handler* handler) const {                    // 迭代器
  Slice input(rep_);
  if (input.size() < kHeader) {                                         // 如果输入的大小小于头部信息的大小 则太小了
    return Status::Corruption("malformed WriteBatch (too small)");
  }

  input.remove_prefix(kHeader);                                         // 移除头部
  Slice key, value;
  int found = 0;
  while (!input.empty()) {                                              // 检查是否为空
    found++;
    char tag = input[0];                                                // 获取当前的tag
    input.remove_prefix(1);                                             // 移除一个该位
    switch (tag) {                                                      // 检查该tag是Put还是Delete
      case kTypeValue:                                                  // 如果是添加
        if (GetLengthPrefixedSlice(&input, &key) &&                       
            GetLengthPrefixedSlice(&input, &value)) {                   // 分别获取key 和 value
          handler->Put(key, value);                                     //  调用handler去添加
        } else {
          return Status::Corruption("bad WriteBatch Put");
        }
        break;
      case kTypeDeletion:                                               // 如果是删除
        if (GetLengthPrefixedSlice(&input, &key)) {                     // 获取对应的key
          handler->Delete(key);                                         // 调用handle的删除方法
        } else {
          return Status::Corruption("bad WriteBatch Delete");
        }
        break;
      default:
        return Status::Corruption("unknown WriteBatch tag");            // 如果tag不对则 返回错误
    }
  }
  if (found != WriteBatchInternal::Count(this)) {                       // 检查查找到的与当前数据保存的数据是否相同 
    return Status::Corruption("WriteBatch has wrong count");
  } else {
    return Status::OK();                                                // 返回成功
  }
}

此时就调用了迭代的方法来插入数据，此时从执行流程可知，先检查头部信息，检查完成头部信息之后，然后再检查该数据的标志位，调用handler的Put或者Delete方法。

Status WriteBatch::Iterate(Handler* handler) const {                    // 迭代器
  Slice input(rep_);
  if (input.size() < kHeader) {                                         // 如果输入的大小小于头部信息的大小 则太小了
    return Status::Corruption("malformed WriteBatch (too small)");
  }

  input.remove_prefix(kHeader);                                         // 移除头部
  Slice key, value;
  int found = 0;
  while (!input.empty()) {                                              // 检查是否为空
    found++;
    char tag = input[0];                                                // 获取当前的tag
    input.remove_prefix(1);                                             // 移除一个该位
    switch (tag) {                                                      // 检查该tag是Put还是Delete
      case kTypeValue:                                                  // 如果是添加
        if (GetLengthPrefixedSlice(&input, &key) &&                       
            GetLengthPrefixedSlice(&input, &value)) {                   // 分别获取key 和 value
          handler->Put(key, value);                                     //  调用handler去添加
        } else {
          return Status::Corruption("bad WriteBatch Put");
        }
        break;
      case kTypeDeletion:                                               // 如果是删除
        if (GetLengthPrefixedSlice(&input, &key)) {                     // 获取对应的key
          handler->Delete(key);                                         // 调用handle的删除方法
        } else {
          return Status::Corruption("bad WriteBatch Delete");
        }
        break;
      default:
        return Status::Corruption("unknown WriteBatch tag");            // 如果tag不对则 返回错误
    }
  }
  if (found != WriteBatchInternal::Count(this)) {                       // 检查查找到的与当前数据保存的数据是否相同 
    return Status::Corruption("WriteBatch has wrong count");
  } else {
    return Status::OK();                                                // 返回成功
  }
}

此时执行的handler就是MemTableInserter的实例，并调用该Put和Delete方法；

void Put(const Slice& key, const Slice& value) override {               // 添加内容
    mem_->Add(sequence_, kTypeValue, key, value);                         // 添加序列号 插入类型  key  value
    sequence_++;
  }
  void Delete(const Slice& key) override {
    mem_->Add(sequence_, kTypeDeletion, key, Slice());                    // 添加内容 序列号  删除类型  key 空的value
    sequence_++;
  }

此时就是调用了mem_的Add方法，只不过就是利用了不同的Type来标记是新增数据还是删除数据；此时查看MemTable相关内容

MemTable细节相关

MemTable就是内存中保存的数据，当内存数据规模达到阈值时，就会将内存数据写入到文件中，此时先查看Add方法。

mem_->Add方法

void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
                   const Slice& value) {
  // Format of an entry is concatenation of:
  //  key_size     : varint32 of internal_key.size()
  //  key bytes    : char[internal_key.size()]
  //  value_size   : varint32 of value.size()
  //  value bytes  : char[value.size()]
  size_t key_size = key.size();                                               // 获取key大小
  size_t val_size = value.size();                                             // 获取value大小
  size_t internal_key_size = key_size + 8;                                    // 头部加上8个字节大小 表示是添加还是删除
  const size_t encoded_len = VarintLength(internal_key_size) +
                             internal_key_size + VarintLength(val_size) +
                             val_size;                                        // 包括保存数据的大小 即既保持数据又保存数据大小
  char* buf = arena_.Allocate(encoded_len);                                   // 申请内存
  char* p = EncodeVarint32(buf, internal_key_size);                           // 转换成字符偏移
  memcpy(p, key.data(), key_size);                                            // 拷贝数据到指针指向的位置
  p += key_size;
  EncodeFixed64(p, (s << 8) | type);                                          // 将类型大小存入
  p += 8;
  p = EncodeVarint32(p, val_size);                                            // 转换value字节大小
  memcpy(p, value.data(), val_size);                                          // 将数据拷贝到指定位置处
  assert(p + val_size == buf + encoded_len);
  table_.Insert(buf);                                                         // 此时就就将内容填充到buf处 调用table插入
}

此时主要就是将数据转换为buf，并调用table插入。数据格式如下；

在这里插入图片描述

SkipList跳表

在上一节中，最后调用了table_.Insert函数插入数据，此时的table定义如下；

  typedef SkipList<const char*, KeyComparator> Table;

  Table table_;

此时定义的就是SkipList的Insert方法，在MemTable初始化过程中；

MemTable::MemTable(const InternalKeyComparator& comparator)
    : comparator_(comparator), refs_(0), table_(comparator_, &arena_) {}
    
# DBImpl初始化MemTable
mem = new MemTable(internal_comparator_)

此时可知初始化table_的参数来源一个来自于DBImpl中的internal_comparator_，区域则来自于申请的内存地址。

此时查看SkipList的初始化过程；

template <typename Key, class Comparator>
SkipList<Key, Comparator>::SkipList(Comparator cmp, Arena* arena)
    : compare_(cmp),
      arena_(arena),
      head_(NewNode(0 /* any key will do */, kMaxHeight)),
      max_height_(1),
      rnd_(0xdeadbeef) {
  for (int i = 0; i < kMaxHeight; i++) {
    head_->SetNext(i, nullptr);
  }
}

在初始化过程中，会初始化一个头部节点，然后初始化对应长度（默认是12）的链表，让列表中的数据都为空。

跳表插入数据

template <typename Key, class Comparator>
void SkipList<Key, Comparator>::Insert(const Key& key) {
  // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
  // here since Insert() is externally synchronized.
  Node* prev[kMaxHeight];                                                 // 数组
  Node* x = FindGreaterOrEqual(key, prev);                                // 查找或者创建

  // Our data structure does not allow duplicate insertion
  assert(x == nullptr || !Equal(key, x->key));

  int height = RandomHeight();                                            // 获取随机的height
  if (height > GetMaxHeight()) {                                          // 如果获取的值比当前保存的值大
    for (int i = GetMaxHeight(); i < height; i++) {                       // 遍历循环
      prev[i] = head_;                                                    // 将对应的头部数据设置为head_
    }
    // It is ok to mutate max_height_ without any synchronization
    // with concurrent readers.  A concurrent reader that observes
    // the new value of max_height_ will see either the old value of
    // new level pointers from head_ (nullptr), or a new value set in
    // the loop below.  In the former case the reader will
    // immediately drop to the next level since nullptr sorts after all
    // keys.  In the latter case the reader will use the new node.
    max_height_.store(height, std::memory_order_relaxed);                 // 修改当前的height值 原子修改
  }

  x = NewNode(key, height);                                               // 生成一个节点
  for (int i = 0; i < height; i++) {                                      // 遍历当前列表
    // NoBarrier_SetNext() suffices since we will add a barrier when
    // we publish a pointer to "x" in prev[i].
    x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));                  
    prev[i]->SetNext(i, x);                                               // 插入该节点
  }
}

此时的执行过程，首先会FindGreaterOrEqual查找当前的该函数主要是将当前的key遍历列表查找一个比该key晓得列表，如果没有则创建一个，这样是数据格式以大小来排序。然后再就是设置到当的数据到跳表中。

template <typename Key, class Comparator>
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  // null n is considered infinite
  return (n != nullptr) && (compare_(n->key, key) < 0);       // 比较key的大小 如果传入的n 不为空指针， 并且传入的长度值 小于 当前传入的key值
}

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
                                              Node** prev) const {
  Node* x = head_;                                      // 获取头部
  int level = GetMaxHeight() - 1;                       // 获取层级
  while (true) {
    Node* next = x->Next(level);                        // 依次遍历下一级
    if (KeyIsAfterNode(key, next)) {                    // 检查当前key的大小是否大于next的key大小
      // Keep searching in this list
      x = next;                                         // 如果是之后则继续深入
    } else {
      if (prev != nullptr) prev[level] = x;             // 如果指向不为空  且当前是最小数据长度  则 设置成头指针
      if (level == 0) {                                 // 如果为零就返回当前查找到的 否则下一个层级查找
        return next;                                    
      } else {
        // Switch to next list
        level--;
      }
    }
  }
}

其中compare_在默认情况下，其实调用的是Slice的比较函数；

inline int Slice::compare(const Slice& b) const {
  const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
  int r = memcmp(data_, b.data_, min_len);
  if (r == 0) {
    if (size_ < b.size_)
      r = -1;
    else if (size_ > b.size_)
      r = +1;
  }
  return r;
}

此时就将数据按照长度大小插入到了跳表中。有关跳表的基本内容大家可自行查阅。