Comparator
在leveldb中Comparator为一个抽象类。默认的比较函数为字节比较,
class Comparator {
public:
virtual ~Comparator();
// Three-way comparison. Returns value:
// < 0 iff "a" < "b",
// == 0 iff "a" == "b",
// > 0 iff "a" > "b"
virtual int Compare(const Slice& a, const Slice& b) const = 0;
//比较器的名字
virtual const char* Name() const = 0;
// Advanced functions: these are used to reduce the space requirements
// for internal data structures like index blocks.
//这两个函数作用是减少像index blocks这样的数据结构占用的空间
// If *start < limit, changes *start to a short string in [start,limit).
// Simple comparator implementations may return with *start unchanged,
// i.e., an implementation of this method that does nothing is correct.
// 这个函数的作用就是:如果*start < limit,就在[start,limit)中找到一个
// 短字符串,并赋给*start返回
// 简单的comparator实现可能不改变*start,这也是正确的
virtual void FindShortestSeparator(
std::string* start,
const Slice& limit) const = 0;
// Changes *key to a short string >= *key.
// Simple comparator implementations may return with *key unchanged,
// i.e., an implementation of this method that does nothing is correct.
//这个函数的作用就是:找一个>= *key的短字符串
//简单的comparator实现可能不改变*key,这也是正确的
virtual void FindShortSuccessor(std::string* key) const = 0;
};
// Return a builtin comparator that uses lexicographic byte-wise
// ordering. The result remains the property of this module and
// must not be deleted.
extern const Comparator* BytewiseComparator();
}
BytewiseComparatorImpl
实现:
class BytewiseComparatorImpl : public Comparator {
public:
BytewiseComparatorImpl() { }
virtual const char* Name() const {
return "leveldb.BytewiseComparator";
}
//比较连个字符串
virtual int Compare(const Slice& a, const Slice& b) const {
return a.compare(b);
}
//Byte wise的FindShortestSeparator
virtual void FindShortestSeparator(
std::string* start,
const Slice& limit) const {
// Find length of common prefix
//首先计算公共前缀:diff_index
size_t min_length = std::min(start->size(), limit.size());
size_t diff_index = 0;
while ((diff_index < min_length) &&
((*start)[diff_index] == limit[diff_index])) {
diff_index++;
}
if (diff_index >= min_length) {
// Do not shorten if one string is a prefix of the other
//说明*start是limit的前缀
} else {
//不是其前缀的话,就让diff_index位置的字符加1,并设置start的长度为diff_index+1,返回
//当前不是所有的情况都+1的。只有满足:diff_byte<oxff且diff_byte+1<limint[diff_index]
uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
if (diff_byte < static_cast<uint8_t>(0xff) &&
diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
(*start)[diff_index]++;
start->resize(diff_index + 1);
assert(Compare(*start, limit) < 0);
}
}
}
virtual void FindShortSuccessor(std::string* key) const {
// Find first character that can be incremented
size_t n = key->size();
for (size_t i = 0; i < n; i++) {
const uint8_t byte = (*key)[i];
if (byte != static_cast<uint8_t>(0xff)) {
(*key)[i] = byte + 1;
key->resize(i+1);
return;
}
}
// *key is a run of 0xffs. Leave it alone.
}
};
InternalKeyComparator
Internal key由user key和sequence number和value type组合而成的。因此需要user key的comparator_成员来比较user key.
//internal key 的比较类
class InternalKeyComparator : public Comparator {
private:
const Comparator* user_comparator_;//需要使用user key的比较函数
public:
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
virtual const char* Name() const;
virtual int Compare(const Slice& a, const Slice& b) const;
virtual void FindShortestSeparator(
std::string* start,
const Slice& limit) const;
virtual void FindShortSuccessor(std::string* key) const;
const Comparator* user_comparator() const { return user_comparator_; }
int Compare(const InternalKey& a, const InternalKey& b) const;
};
具体实现:
//比较器的名字
const char* InternalKeyComparator::Name() const {
return "leveldb.InternalKeyComparator";
}
//InternalKey比较
//先比较user key,不相等就直接返回,使用user_compair
//再比较sequence number | value type, 这里是降序
//这里,没有比较后面的value了,因为sequence number是唯一的
int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
// Order by:
// increasing user key (according to user-supplied comparator)
// decreasing sequence number
// decreasing type (though sequence# should be enough to disambiguate)
int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
if (r == 0) {
const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
if (anum > bnum) {
r = -1;
} else if (anum < bnum) {
r = +1;
}
}
return r;
}
//InternalKey的FindShortestSeparator
void InternalKeyComparator::FindShortestSeparator(
std::string* start,
const Slice& limit) const {
// Attempt to shorten the user portion of the key
//尝试user_key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
user_comparator_->FindShortestSeparator(&tmp, user_limit);
if (user_comparator_->Compare(*start, tmp) < 0) {
// User key has become larger. Tack on the earliest possible
// number to the shortened user key.
//使用最大的sequence number以保证是最新的*start
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
assert(this->Compare(*start, tmp) < 0);
assert(this->Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
//取出internal key的user key字段,根据internal key字段找到并替换key,如果key被替换了,
//就用新的key更新Internal Key,并使用最大的sequence number。否则保持不变。
void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
user_comparator_->FindShortSuccessor(&tmp);
if (user_comparator_->Compare(user_key, tmp) < 0) {
// User key has become larger. Tack on the earliest possible
// number to the shortened user key.
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
assert(this->Compare(*key, tmp) < 0);
key->swap(tmp);
}
}