上一节我们已经详细表述了Ssttbale的组成,从逻辑结构和物理结构出发做了了解。 那么我们在这里怎么创建一个Ssttable呢,时序肯定是这样的:对于一个<key,value> 我们先构建他的逻辑结构, 先是构建Block数据部分,然后再是填写meta Block和索引部分 ,等到这些逻辑部分填充完毕我们在将他们写入磁盘Block(根据type启用压缩)
关键的便是Block数据的填充,即record的写入。
本篇我们将要讲述Block的构建和读取(查询) 对于Block的构建是通过class BlockBuilder完成 。类的声明如下
class BlockBuilder {
public:
explicit BlockBuilder(const Options* options);
// Reset the contents as if the BlockBuilder was just constructed.
void Reset(); //重设内容,通常在Finish之后调用已构建新的block
// REQUIRES: Finish() has not been callled since the last call to Reset().
// REQUIRES: key is larger than any previously added key
void Add(const Slice& key, const Slice& value); //添加数据<key,value>
// Finish building the block and return a slice that refers to the
// block contents. The returned slice will remain valid for the
// lifetime of this builder or until Reset() is called.
Slice Finish(); // 结束构建block,并返回指向block内容的指针
// Returns an estimate of the current (uncompressed) size of the block
// we are building.
size_t CurrentSizeEstimate() const; // 返回正在构建block的未压缩大小估计值
// Return true iff no entries have been added since the last Reset()
bool empty() const {
return buffer_.empty();
}
private:
const Options* options_;
std::string buffer_; // Destination buffer //目标Block内容
std::vector<uint32_t> restarts_; // Restart points //重启点
int counter_; // Number of entries emitted since restart //重启点后面的入口数目
bool finished_; // Has Finish() been called? //结束标志
std::string last_key_; // 记录最后添加的key
// No copying allowed
BlockBuilder(const BlockBuilder&);
void operator=(const BlockBuilder&);
接口注释很齐全,那么我们在看看具体实现
下面的代码我们给出详细出注释
BlockBuilder::BlockBuilder(const Options* options)
: options_(options),
restarts_(),
counter_(0),
finished_(false) {
assert(options->block_restart_interval >= 1);
restarts_.push_back(0); // First restart point is at offset 0 //重启点开始设为0
}
下面是重置reset方法,一般用在finish 之后,用于已经构建好的Block.
void BlockBuilder::Reset() {
buffer_.clear();
restarts_.clear();
restarts_.push_back(0); // First restart point is at offset 0
counter_ = 0;
finished_ = false;
last_key_.clear();
}
接下来是Block大小的估算代码也很简单
size_t BlockBuilder::CurrentSizeEstimate() const {
return (buffer_.size() + // Raw data buffer
restarts_.size() * sizeof(uint32_t) + // Restart array
sizeof(uint32_t)); // Restart array length
}
Add方法是重点 采用前缀压缩,给出详细注释如下
void BlockBuilder::Add(const Slice& key, const Slice& value) {
Slice last_key_piece(last_key_);
assert(!finished_);
assert(counter_ <= options_->block_restart_interval);
assert(buffer_.empty() // No values yet?
|| options_->comparator->Compare(key, last_key_piece) > 0);
size_t shared = 0; //计算前缀长度
if (counter_ < options_->block_restart_interval) {
// See how much sharing to do with previous string
const size_t min_length = std::min(last_key_piece.size(), key.size()); //取两者较小值
while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
shared++; //公共长度自增
}
} else {
// Restart compression
restarts_.push_back(buffer_.size()); //出错处理
counter_ = 0;
}
const size_t non_shared = key.size() - shared; //非共享字符长度
// Add "<shared><non_shared><value_size>" to buffer_
PutVarint32(&buffer_, shared); //写入shared字段
PutVarint32(&buffer_, non_shared); //非共享字段
PutVarint32(&buffer_, value.size());
// Add string delta to buffer_ followed by value
buffer_.append(key.data() + shared, non_shared); //写入非共享资源
buffer_.append(value.data(), value.size()); //写入value值
// Update state
last_key_.resize(shared);
last_key_.append(key.data() + shared, non_shared);
assert(Slice(last_key_) == key);
counter_++; //entries计数
}
以上我们已经全部解析出 Block的构建过程,下面我们重点谈谈Block的读取过程。读取是class Block完成的
class Block {
public:
// Initialize the block with the specified contents.
explicit Block(const BlockContents& contents);
~Block();
size_t size() const { return size_; }
Iterator* NewIterator(const Comparator* comparator); //用来遍历<key,value>
private:
uint32_t NumRestarts() const;
const char* data_; //block数据指针
size_t size_; //block数据大小
uint32_t restart_offset_; // Offset in data_ of restart array //重启点数组偏移
bool owned_; // Block owns data_[]
// No copying allowed
Block(const Block&);
void operator=(const Block&);
class Iter;
Block采用iter来遍历和读取<key,value>
遍历的过程是这样的:
1.找到相关重启点(key之前的一个重启点)
2.在两个重启点指间依次查找<key,value>
查找之前一个重启点的过程是二分法实现的,那么为什么重启点直接的record不能二分查找呢,原因在于record长度不一致
二分查找重启点代码如下
virtual void Seek(const Slice& target) {
// Binary search in restart array to find the last restart point
// with a key < target
uint32_t left = 0;
uint32_t right = num_restarts_ - 1;
while (left < right) {
uint32_t mid = (left + right + 1) / 2;
uint32_t region_offset = GetRestartPoint(mid);
uint32_t shared, non_shared, value_length;
const char* key_ptr = DecodeEntry(data_ + region_offset,
data_ + restarts_,
&shared, &non_shared, &value_length);
if (key_ptr == NULL || (shared != 0)) {
CorruptionError();
return;
}
Slice mid_key(key_ptr, non_shared);
if (Compare(mid_key, target) < 0) {
// Key at "mid" is smaller than "target". Therefore all
// blocks before "mid" are uninteresting.
left = mid;
} else {
// Key at "mid" is >= "target". Therefore all blocks at or
// after "mid" are uninteresting.
right = mid - 1;
}
}
得到重启点后便沿着重启点线性向下查找
while (true) {
if (!ParseNextKey()) return;
if (Compare(key_, target)>= 0) return;
}
到此为止我们便能够查找到目标<key,value>
以上就是Block的构建和读(查询过程)SStable的构建和查找基本上也是在这之上的,下篇说说bloom filter策略。