原文地址:http://www.blogjava.net/sandy/archive/2012/03/16/leveldb7.html
leveldb 使用 version 来保存数据库的状态。
先看看一个重要的数据结果,sst file的META info
<db/version_edit.h>
struct
FileMetaData {
int refs; // 引用计数
int allowed_seeks; // 允许的seeks次数
uint64_t number; // 文件编号
uint64_t file_size; // 文件大小
InternalKey smallest; // 最小的key
InternalKey largest; // 最大的key
FileMetaData() : refs( 0 ), allowed_seeks( 1 << 30 ), file_size( 0 ) { }
};
这里面有一个很有意思的字段: allowed_seeks,代表了可以seek的次数,为0的时候表示这个文件需要被compaction.如何设置seeks次数呢?文件大小除以16k,不到100算100。
int refs; // 引用计数
int allowed_seeks; // 允许的seeks次数
uint64_t number; // 文件编号
uint64_t file_size; // 文件大小
InternalKey smallest; // 最小的key
InternalKey largest; // 最大的key
FileMetaData() : refs( 0 ), allowed_seeks( 1 << 30 ), file_size( 0 ) { }
};
f
->
allowed_seeks
=
(f
->
file_size
/
16384
);
if (f -> allowed_seeks < 100 ) f -> allowed_seeks = 100 ;
原因,请看leveldb的注释:
if (f -> allowed_seeks < 100 ) f -> allowed_seeks = 100 ;
// We arrange to automatically compact this file after a certain number of seeks. Let's assume:
// (1) One seek costs 10ms
// (2) Writing or reading 1MB costs 10ms (100MB/s)
// (3) A compaction of 1MB does 25MB of IO:
// 1MB read from this level
// 10-12MB read from next level (boundaries may be misaligned)
// 10-12MB written to next level
// This implies that 25 seeks cost the same as the compaction
// of 1MB of data. I.e., one seek costs approximately the
// same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction.
接下来看Version的定义,version其实就是一系列的SST file的集合。
// (1) One seek costs 10ms
// (2) Writing or reading 1MB costs 10ms (100MB/s)
// (3) A compaction of 1MB does 25MB of IO:
// 1MB read from this level
// 10-12MB read from next level (boundaries may be misaligned)
// 10-12MB written to next level
// This implies that 25 seeks cost the same as the compaction
// of 1MB of data. I.e., one seek costs approximately the
// same as the compaction of 40KB of data. We are a little
// conservative and allow approximately one seek for every 16KB
// of data before triggering a compaction.
class
Version {
public :
// 生成iterator用于遍历
void AddIterators( const ReadOptions & , std::vector < Iterator *>* iters);
// 根据key来查询,若没有查到,更新GetStats
struct GetStats {
FileMetaData * seek_file;
int seek_file_level;
};
Status Get( const ReadOptions & , const LookupKey & key, std:: string * val,
GetStats * stats);
// 是否需要进行compaction
bool UpdateStats( const GetStats & stats);
// 引用计算,避免在被引用时候删除
void Ref();
void Unref();
// 查询和key range有关的files
void GetOverlappingInputs(
int level,
const InternalKey * begin, // NULL means before all keys
const InternalKey * end, // NULL means after all keys
std::vector < FileMetaData *>* inputs);
// 计算是否level对某个key range是否有overlap
bool OverlapInLevel( int level,
const Slice * smallest_user_key,
const Slice * largest_user_key);
// memtable output应该放到哪个level
int PickLevelForMemTableOutput( const Slice & smallest_user_key,
const Slice & largest_user_key);
// 某个level的文件个数
int NumFiles( int level) const { return files_[level].size(); }
// Return a human readable string that describes this version's contents.
std:: string DebugString() const ;
private :
friend class Compaction;
friend class VersionSet;
class LevelFileNumIterator;
Iterator * NewConcatenatingIterator( const ReadOptions & , int level) const ;
VersionSet * vset_; // VersionSet to which this Version belongs
Version * next_; // Next version in linked list
Version * prev_; // Previous version in linked list
int refs_; // Number of live refs to this version
// sst files
std::vector < FileMetaData *> files_[config::kNumLevels];
// 下一个要被compaction的文件
FileMetaData * file_to_compact_;
int file_to_compact_level_;
// compaction score:>1表示要compaction
double compaction_score_;
int compaction_level_;
explicit Version(VersionSet * vset)
: vset_(vset), next_( this ), prev_( this ), refs_( 0 ),
file_to_compact_(NULL),
file_to_compact_level_( - 1 ),
compaction_score_( - 1 ),
compaction_level_( - 1 ) {
}
~ Version();
// No copying allowed
Version( const Version & );
void operator = ( const Version & );
};
public :
// 生成iterator用于遍历
void AddIterators( const ReadOptions & , std::vector < Iterator *>* iters);
// 根据key来查询,若没有查到,更新GetStats
struct GetStats {
FileMetaData * seek_file;
int seek_file_level;
};
Status Get( const ReadOptions & , const LookupKey & key, std:: string * val,
GetStats * stats);
// 是否需要进行compaction
bool UpdateStats( const GetStats & stats);
// 引用计算,避免在被引用时候删除
void Ref();
void Unref();
// 查询和key range有关的files
void GetOverlappingInputs(
int level,
const InternalKey * begin, // NULL means before all keys
const InternalKey * end, // NULL means after all keys
std::vector < FileMetaData *>* inputs);
// 计算是否level对某个key range是否有overlap
bool OverlapInLevel( int level,
const Slice * smallest_user_key,
const Slice * largest_user_key);
// memtable output应该放到哪个level
int PickLevelForMemTableOutput( const Slice & smallest_user_key,
const Slice & largest_user_key);
// 某个level的文件个数
int NumFiles( int level) const { return files_[level].size(); }
// Return a human readable string that describes this version's contents.
std:: string DebugString() const ;
private :
friend class Compaction;
friend class VersionSet;
class LevelFileNumIterator;
Iterator * NewConcatenatingIterator( const ReadOptions & , int level) const ;
VersionSet * vset_; // VersionSet to which this Version belongs
Version * next_; // Next version in linked list
Version * prev_; // Previous version in linked list
int refs_; // Number of live refs to this version
// sst files
std::vector < FileMetaData *> files_[config::kNumLevels];
// 下一个要被compaction的文件
FileMetaData * file_to_compact_;
int file_to_compact_level_;
// compaction score:>1表示要compaction
double compaction_score_;
int compaction_level_;
explicit Version(VersionSet * vset)
: vset_(vset), next_( this ), prev_( this ), refs_( 0 ),
file_to_compact_(NULL),
file_to_compact_level_( - 1 ),
compaction_score_( - 1 ),
compaction_level_( - 1 ) {
}
~ Version();
// No copying allowed
Version( const Version & );
void operator = ( const Version & );
};
那VersionSet呢?VersionSet 是version组成一个双向循环链表。
class
VersionSet{
// . . .
Env * const env_;
const std:: string dbname_;
const Options * const options_;
TableCache * const table_cache_;
const InternalKeyComparator icmp_;
uint64_t next_file_number_;
uint64_t manifest_file_number_;
uint64_t last_sequence_;
uint64_t log_number_;
WritableFile * descriptor_file_;
log::Writer * descriptor_log_;
Version dummy_versions_; // Head of circular doubly-linked list of versions.
Version * current_; // == dummy_versions_.prev_
// 每层都有一个compact pointer用于指示下次从哪里开始compact,以用于实现循环compact
std:: string compact_pointer_[config::kNumLevels];
// . . .
}
// . . .
Env * const env_;
const std:: string dbname_;
const Options * const options_;
TableCache * const table_cache_;
const InternalKeyComparator icmp_;
uint64_t next_file_number_;
uint64_t manifest_file_number_;
uint64_t last_sequence_;
uint64_t log_number_;
WritableFile * descriptor_file_;
log::Writer * descriptor_log_;
Version dummy_versions_; // Head of circular doubly-linked list of versions.
Version * current_; // == dummy_versions_.prev_
// 每层都有一个compact pointer用于指示下次从哪里开始compact,以用于实现循环compact
std:: string compact_pointer_[config::kNumLevels];
// . . .
}
VersionEdit是version对象的变更记录,用于写入manifest.这样通过原始的version加上一系列的versionedit的记录,就可以恢复到最新状态。
class
VersionEdit {
public :
VersionEdit() { Clear(); }
~ VersionEdit() { }
void Clear();
void SetComparatorName( const Slice & name) {
has_comparator_ = true ;
comparator_ = name.ToString();
}
void SetLogNumber(uint64_t num) {
has_log_number_ = true ;
log_number_ = num;
}
void SetPrevLogNumber(uint64_t num) {
has_prev_log_number_ = true ;
prev_log_number_ = num;
}
void SetNextFile(uint64_t num) {
has_next_file_number_ = true ;
next_file_number_ = num;
}
void SetLastSequence(SequenceNumber seq) {
has_last_sequence_ = true ;
last_sequence_ = seq;
}
void SetCompactPointer( int level, const InternalKey & key) {
compact_pointers_.push_back(std::make_pair(level, key));
}
// 添加meta file
void AddFile( int level, uint64_t file,
uint64_t file_size,
const InternalKey & smallest,
const InternalKey & largest) {
FileMetaData f;
f.number = file;
f.file_size = file_size;
f.smallest = smallest;
f.largest = largest;
new_files_.push_back(std::make_pair(level, f));
}
// 删除特定的文件
void DeleteFile( int level, uint64_t file) {
deleted_files_.insert(std::make_pair(level, file));
}
// 编码,解码:用于写入manifest
void EncodeTo(std:: string * dst) const ;
Status DecodeFrom( const Slice & src);
std:: string DebugString() const ;
private :
friend class VersionSet;
typedef std:: set < std::pair < int , uint64_t > > DeletedFileSet;
std:: string comparator_;
uint64_t log_number_;
uint64_t prev_log_number_;
uint64_t next_file_number_;
SequenceNumber last_sequence_;
bool has_comparator_;
bool has_log_number_;
bool has_prev_log_number_;
bool has_next_file_number_;
bool has_last_sequence_;
std::vector < std::pair < int , InternalKey > > compact_pointers_;
DeletedFileSet deleted_files_;
std::vector < std::pair < int , FileMetaData > > new_files_;
};
public :
VersionEdit() { Clear(); }
~ VersionEdit() { }
void Clear();
void SetComparatorName( const Slice & name) {
has_comparator_ = true ;
comparator_ = name.ToString();
}
void SetLogNumber(uint64_t num) {
has_log_number_ = true ;
log_number_ = num;
}
void SetPrevLogNumber(uint64_t num) {
has_prev_log_number_ = true ;
prev_log_number_ = num;
}
void SetNextFile(uint64_t num) {
has_next_file_number_ = true ;
next_file_number_ = num;
}
void SetLastSequence(SequenceNumber seq) {
has_last_sequence_ = true ;
last_sequence_ = seq;
}
void SetCompactPointer( int level, const InternalKey & key) {
compact_pointers_.push_back(std::make_pair(level, key));
}
// 添加meta file
void AddFile( int level, uint64_t file,
uint64_t file_size,
const InternalKey & smallest,
const InternalKey & largest) {
FileMetaData f;
f.number = file;
f.file_size = file_size;
f.smallest = smallest;
f.largest = largest;
new_files_.push_back(std::make_pair(level, f));
}
// 删除特定的文件
void DeleteFile( int level, uint64_t file) {
deleted_files_.insert(std::make_pair(level, file));
}
// 编码,解码:用于写入manifest
void EncodeTo(std:: string * dst) const ;
Status DecodeFrom( const Slice & src);
std:: string DebugString() const ;
private :
friend class VersionSet;
typedef std:: set < std::pair < int , uint64_t > > DeletedFileSet;
std:: string comparator_;
uint64_t log_number_;
uint64_t prev_log_number_;
uint64_t next_file_number_;
SequenceNumber last_sequence_;
bool has_comparator_;
bool has_log_number_;
bool has_prev_log_number_;
bool has_next_file_number_;
bool has_last_sequence_;
std::vector < std::pair < int , InternalKey > > compact_pointers_;
DeletedFileSet deleted_files_;
std::vector < std::pair < int , FileMetaData > > new_files_;
};