block结构示意图
sstable中Block 头文件如下:
class Block { public: // Initialize the block with the specified contents. // Takes ownership of data[] and will delete[] it when done. Block(const char* data, size_t size); ~Block(); size_t size() const { return size_; } Iterator* NewIterator(const Comparator* comparator); private: uint32_t NumRestarts() const; const char* data_; size_t size_; uint32_t restart_offset_; // Offset in data_ of restart array // No copying allowed Block(const Block&); void operator=(const Block&); class Iter; };
重启点在上个章节已经介绍过了
"“重启点”是干什么的呢?简单来说就是进行数据压缩,减少存储空间。我们一再强调,Block内容里的KV记录是按照Key大小有序的,这样的话,相邻的两条记录很可能Key部分存在重叠,比如key i=“the car”,Key i+1=“the color”,那么两者存在重叠部分“the c”,为了减少Key的存储量,Key i+1可以只存储和上一条Key不同的部分“olor”,两者的共同部分从Key i中可以获得。记录的Key在Block内容部分就是这么存储的,主要目的是减少存储开销。“重启点”的意思是:在这条记录开始,不再采取只记载不同的Key部分,而是重新记录所有的Key值,假设Key i+1是一个重启点,那么Key里面会完整存储“the color”,而不是采用简略的“olor”方式。但是如果记录条数比较多,随机访问一条记录,需要从头开始一直解析才行,这样也产生很大的开销,所以设置了多个重启点,Block尾部就是指出哪些记录是这些重启点的。 "
//获取BLOCK中的重启点数目 inline uint32_t Block::NumRestarts() const { assert(size_ >= 2*sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); //重启点在block最后8字节(uint32_t)中
}
Block的创建和销毁
Block::Block(const char* data, size_t size) : data_(data), size_(size) { if (size_ < sizeof(uint32_t)) { size_ = 0; // Error marker } else { restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); //重启点数目1个uint32 每个重启点的偏移记录 uint32 合记共(1+NumRestarts())* sizeof(uint32_t) if (restart_offset_ > size_ - sizeof(uint32_t)) { // The size is too small for NumRestarts() and therefore // restart_offset_ wrapped around. size_ = 0; } } } Block::~Block() { delete[] data_; }
Block中每个entry的解码
entry结构如上图的 KeyValuePair
static inline const char* DecodeEntry(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared, uint32_t* value_length) { if (limit - p < 3) return NULL; //至少包含3个 共享字节 *shared = reinterpret_cast<const unsigned char*>(p)[0]; *non_shared = reinterpret_cast<const unsigned char*>(p)[1]; *value_length = reinterpret_cast<const unsigned char*>(p)[2]; if ((*shared | *non_shared | *value_length) < 128) { // Fast path: all three values are encoded in one byte each
//三个记录的值或操作后 均没有超过128 即最高位为0
p += 3; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL; if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL; if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; } if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) { return NULL; } return p; }
Block使用的迭代器
class Block::Iter : public Iterator
基本数据结构
class Block::Iter : public Iterator { private: const Comparator* const comparator_; const char* const data_; // underlying block contents uint32_t const restarts_; // Offset of restart array (list of fixed32) uint32_t const num_restarts_; // Number of uint32_t entries in restart array // current_ is offset in data_ of current entry. >= restarts_ if !Valid uint32_t current_; uint32_t restart_index_; // Index of restart block in which current_ falls std::string key_; Slice value_; Status status_; inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b); } }
// Return the offset in data_ just past the end of the current entry. //下一个记录的起点就是当前记录的末尾偏移 //当前记录加上记录的长度 和 BLOCK的起点的差 就是偏移 inline uint32_t NextEntryOffset() const { return (value_.data() + value_.size()) - data_; } uint32_t GetRestartPoint(uint32_t index) { //data_ + restarts_就是记录各个重启点偏移的数组 //根据重启点index 计算偏移data_ + restarts_ ,里面就是第index个重启点的偏移 assert(index < num_restarts_); return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); } void SeekToRestartPoint(uint32_t index) { key_.clear(); restart_index_ = index; // current_ will be fixed by ParseNextKey(); //value结束就是KEY的开始 所以使用value_记录 uint32_t offset = GetRestartPoint(index); value_ = Slice(data_ + offset, 0); }
bool ParseNextKey() { current_ = NextEntryOffset(); //获取下一个entry的偏移 const char* p = data_ + current_; const char* limit = data_ + restarts_; // 所有BLOCK内数据不可能超过restart if (p >= limit) { // No more entries to return. Mark as invalid. current_ = restarts_; restart_index_ = num_restarts_; return false; } // Decode next entry uint32_t shared, non_shared, value_length; //解析获取 key的共享字段长度 非共享字段长度和value的长度 p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); if (p == NULL || key_.size() < shared) { CorruptionError(); return false; } else { key_.resize(shared); //key保存了其他entry的key 但是可以保留共享长度的字符串 key_.append(p, non_shared); //再添加非共享长度的字符串 就是当前KEY内容 value_ = Slice(p + non_shared, value_length); //value 就是略过key的偏移 //编译restart点 确认restart点的偏移是离自己最近的 restart_index_< current_ < (restart_index_ + 1) while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } return true; } } };
参考:
https://www.cnblogs.com/itdef/p/9789620.html