leveldb的后台进程是leveldb当中比较难的部分,到现在还是模模糊糊的,先把自己懂的记录下来,以后再学习
1、leveldb通过DBImpl::MaybeScheduleCompaction启动后台进程(其实是一个进程),对数据进行合并、转储、压缩等处理
2、它首先判断是否已经持有互斥锁
3、判断是否已经启动后台进程,如果已经启动了,就什么都不做
4、判断数据库是否已经被删除,如果已经被删除了,就什么都不做
5、判断是否有后台错误产生,如果有,那么什么都不做
6、判断是否真正需要启用后台进程,如果不需要,就退出
7、创建一个线程,线程函数为DBImpl::BGWork(它内部直接调用DBImpl::BackgroundCall,后者会调用BackgroundCompaction()进行处理)
8、BackgroundCall的最后又会重新调用MaybeScheduleCompaction(),形成了循环,不断的进行处理。
BackgroundCompaction()的流程:
1、断言是否持有锁
2、如果只读的内存table不为空,调用CompactMemTable对内存表进行处理
3、判断是否为手工压缩。所谓手工压缩就是leveldb的使用者调用某些接口,对数据库的数据主动进行压缩。
4、如果是手工压缩,那么根据传进来的最大、最小键值调用CompactRange进行指定压缩的范围;否则后台自己调用PickCompaction函数挑选数据。
5、如果代价很小,那么直接把level层的文件推到level+1层即可。否则需要调用DoCompactionWork进行压缩
6、清理释放内存等
DoCompactionWork函数的流程:
1、获取需要压缩的数据的迭代器,并移动到开头
2、对于每一条需要压缩的数据,进行下面的处理
3、对内存表压缩,把只读内存表转储到硬盘上
4、如果遇到某个键值的时候需要等待,那么就等待压缩的完成才继续
5、解析内部使用的键
6、判断某个键是要被丢弃(例如删除类型的就可能会被丢弃)
7、创建压缩构建器
8、把键值添加到构建器中
9、如果构建器的大小已经达到阈值,那么就进行压缩,转到2继续
10、等待压缩的完成统计读取和写入的字节数
11、合并压缩已经完成,那么应该把这种改变反映到当前的版本上,即把版本增量应用到版本上。
// 调度后台的任务
void DBImpl::MaybeScheduleCompaction() {
mutex_.AssertHeld();
// 如果已经调度了,那么什么都不做
if (bg_compaction_scheduled_) {
// Already scheduled
// 如果数据已经被删除了,那么也什么都不做
} else if (shutting_down_.Acquire_Load()) {
// DB is being deleted; no more background compactions
} else if (!bg_error_.ok()) {
// Already got an error; no more changes
} else if (imm_ == NULL &&
manual_compaction_ == NULL &&
!versions_->NeedsCompaction()) {
// No work to be done
} else {
bg_compaction_scheduled_ = true;
// 开始调度后台工作
env_->Schedule(&DBImpl::BGWork, this);
}
}
// 后台压缩
void DBImpl::BackgroundCompaction() {
mutex_.AssertHeld();
// 如果内存表格不为空
if (imm_ != NULL) {
// 压缩内存表格
CompactMemTable();
return;
}
Compaction* c;
// 是否手工压缩
bool is_manual = (manual_compaction_ != NULL);
InternalKey manual_end;
// 如果是手工的
if (is_manual) {
ManualCompaction* m = manual_compaction_;
// 压缩
c = versions_->CompactRange(m->level, m->begin, m->end);
// 判断压缩是否完成
m->done = (c == NULL);
if (c != NULL) {
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
}
Log(options_.info_log,
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
m->level,
(m->begin ? m->begin->DebugString().c_str() : "(begin)"),
(m->end ? m->end->DebugString().c_str() : "(end)"),
(m->done ? "(end)" : manual_end.DebugString().c_str()));
}
else
{
// 非手工方式
c = versions_->PickCompaction();
}
Status status;
// 如果压缩已经完成,那么什么都不做
if (c == NULL) {
// Nothing to do
}
// 如果不是手工方式,而且是TrivialMove(选出的sstable都处于level-n且不会造成过多的GrandparentOverrlap)
// 如果某一个文件有大量的重叠的数据,应该避免移动,否则要花费很大的代价
// IsTrivialMove函数就是要判断是否适合移动
else if (!is_manual && c->IsTrivialMove())
{
// 如果代价很小,那么直接把level层的文件推到level+1层即可
// Move file to next level
assert(c->num_input_files(0) == 1);
// 得到一个文件元数据
FileMetaData* f = c->input(0, 0);
// 把它从当前层中删除
c->edit()->DeleteFile(c->level(), f->number);
// 把它添加到下一层
c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
f->smallest, f->largest);
// 记录并应用
status = versions_->LogAndApply(c->edit(), &mutex_);
if (!status.ok()) {
RecordBackgroundError(status);
}
VersionSet::LevelSummaryStorage tmp;
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
static_cast<unsigned long long>(f->number),
c->level() + 1,
static_cast<unsigned long long>(f->file_size),
status.ToString().c_str(),
versions_->LevelSummary(&tmp));
}
else
{
// 如果代价很大
CompactionState* compact = new CompactionState(c);
// 真正进行压缩工作的函数
status = DoCompactionWork(compact);
// 判断是否出错
if (!status.ok()) {
RecordBackgroundError(status);
}
// 清理
CleanupCompaction(compact);
// 释放输入
c->ReleaseInputs();
//删除过期文件
DeleteObsoleteFiles();
}
delete c;
if (status.ok())
{
// Done
}
else if (shutting_down_.Acquire_Load())
{
// Ignore compaction errors found during shutting down
}
else
{
Log(options_.info_log,
"Compaction error: %s", status.ToString().c_str());
}
if (is_manual) {
ManualCompaction* m = manual_compaction_;
if (!status.ok()) {
m->done = true;
}
if (!m->done) {
// We only compacted part of the requested range. Update *m
// to the range that is left to be compacted.
m->tmp_storage = manual_end;
m->begin = &m->tmp_storage;
}
manual_compaction_ = NULL;
}
}
// 进行压缩的工作
Status DBImpl::DoCompactionWork(CompactionState* compact) {
// 当前的时间
const uint64_t start_micros = env_->NowMicros();
int64_t imm_micros = 0; // Micros spent doing imm_ compactions
Log(options_.info_log, "Compacting %d@%d + %d@%d files",
compact->compaction->num_input_files(0),
compact->compaction->level(),
compact->compaction->num_input_files(1),
compact->compaction->level() + 1);
assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
assert(compact->builder == NULL);
assert(compact->outfile == NULL);
// 如果快照列表是空的
if (snapshots_.empty()) {
compact->smallest_snapshot = versions_->LastSequence();
} else {
compact->smallest_snapshot = snapshots_.oldest()->number_;
}
// Release mutex while we're actually doing the compaction work
// 当真正做压缩工作的时候,需要释放锁
mutex_.Unlock();
// compact中记录了待压缩的数据
Iterator* input = versions_->MakeInputIterator(compact->compaction);
input->SeekToFirst();
Status status;
// 已经解析的键值
ParsedInternalKey ikey;
// 当前用户的键值
std::string current_user_key;
bool has_current_user_key = false;
SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
// 如果输入是有效的,而且没有收到关闭通知
// 那么就会一直循环,直到每一条待压缩数据都处理完成
for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
// Prioritize immutable compaction work
// 如果有内存屏障
if (has_imm_.NoBarrier_Load() != NULL)
{
const uint64_t imm_start = env_->NowMicros();
mutex_.Lock();
// 只读内存表不为空
if (imm_ != NULL)
{
// 对内存表进行压缩
CompactMemTable();
// 通知MakeRoomForWrite
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
}
mutex_.Unlock();
imm_micros += (env_->NowMicros() - imm_start);
}
// 获取键值
Slice key = input->key();
// 如果在遇到这个键值的时候需要停止压缩进程
if (compact->compaction->ShouldStopBefore(key) &&
compact->builder != NULL)
{
// 等待压缩的完成
status = FinishCompactionOutputFile(compact, input);
if (!status.ok())
{
break;
}
}
// Handle key/value, add to state, etc.
bool drop = false;
// 解析内部键
if (!ParseInternalKey(key, &ikey)) {
// Do not hide error keys
current_user_key.clear();
has_current_user_key = false;
last_sequence_for_key = kMaxSequenceNumber;
}
else
{
// 设置用户的键
if (!has_current_user_key ||
user_comparator()->Compare(ikey.user_key,Slice(current_user_key)) != 0)
{
// First occurrence of this user key
current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
has_current_user_key = true;
last_sequence_for_key = kMaxSequenceNumber;
}
// 如果last_sequence_for_key小于快照的最小序列号,表示这个键无效
if (last_sequence_for_key <= compact->smallest_snapshot)
{
// Hidden by an newer entry for same user key
drop = true; // (A)
}
// 如果这个键是删除的
// 而且序号小于快照的最小序号
// 而且 。。。
// 那么表示这个键是需要被丢弃的
else if (ikey.type == kTypeDeletion &&
ikey.sequence <= compact->smallest_snapshot &&
compact->compaction->IsBaseLevelForKey(ikey.user_key))
{
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger sequence numbers
// (3) data in layers that are being compacted here and have
// smaller sequence numbers will be dropped in the next
// few iterations of this loop (by rule (A) above).
// Therefore this deletion marker is obsolete and can be dropped.
drop = true;
}
last_sequence_for_key = ikey.sequence;
}
#if 0
Log(options_.info_log,
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
"%d smallest_snapshot: %d",
ikey.user_key.ToString().c_str(),
(int)ikey.sequence, ikey.type, kTypeValue, drop,
compact->compaction->IsBaseLevelForKey(ikey.user_key),
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
#endif
// 如果这个键不会被丢弃
if (!drop) {
// Open output file if necessary
// 如果压缩构建者为空,那么需要打开压缩的输出文件
if (compact->builder == NULL)
{
status = OpenCompactionOutputFile(compact);
if (!status.ok())
{
break;
}
}
if (compact->builder->NumEntries() == 0)
{
compact->current_output()->smallest.DecodeFrom(key);
}
// 对键值进行解码
compact->current_output()->largest.DecodeFrom(key);
// 把键值添加到构建器中
compact->builder->Add(key, input->value());
// Close output file if it is big enough
// 如果文件大小达到了极限,那么需要等待压缩完成才能继续
if (compact->builder->FileSize() >= compact->compaction->MaxOutputFileSize())
{
status = FinishCompactionOutputFile(compact, input);
if (!status.ok()) {
break;
}
}
}
// 继续处理下一个键
input->Next();
}
// 在压缩的过程中数据库被删除
if (status.ok() && shutting_down_.Acquire_Load()) {
status = Status::IOError("Deleting DB during compaction");
}
// 等待压缩完成
if (status.ok() && compact->builder != NULL) {
status = FinishCompactionOutputFile(compact, input);
}
if (status.ok()) {
status = input->status();
}
delete input;
input = NULL;
// 压缩状态
CompactionStats stats;
stats.micros = env_->NowMicros() - start_micros - imm_micros;
// 统计压缩了多少字节
for (int which = 0; which < 2; which++)
{
for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
stats.bytes_read += compact->compaction->input(which, i)->file_size;
}
}
// 统计写了多少字节
for (size_t i = 0; i < compact->outputs.size(); i++) {
stats.bytes_written += compact->outputs[i].file_size;
}
mutex_.Lock();
stats_[compact->compaction->level() + 1].Add(stats);
// 如果处理成功
if (status.ok()) {
// 安装压缩结果
status = InstallCompactionResults(compact);
}
// 如果不成功
if (!status.ok()) {
// 记录后台错误
RecordBackgroundError(status);
}
VersionSet::LevelSummaryStorage tmp;
Log(options_.info_log,
"compacted to: %s", versions_->LevelSummary(&tmp));
return status;
}
// 合并压缩内存表格
void DBImpl::CompactMemTable()
{
mutex_.AssertHeld();
assert(imm_ != NULL);
// Save the contents of the memtable as a new Table
// 版本修改
VersionEdit edit;
// 当前的版本
Version* base = versions_->current();
base->Ref();
// 把只读内存表写到硬盘中
Status s = WriteLevel0Table(imm_, &edit, base);
base->Unref();
// 如果数据库已经被删除了,那么出错
if (s.ok() && shutting_down_.Acquire_Load()) {
s = Status::IOError("Deleting DB during memtable compaction");
}
// Replace immutable memtable with the generated Table
// 记录并应用版本修改,是当前版本达到最新
if (s.ok())
{
edit.SetPrevLogNumber(0);
edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed
// 记录并应用
s = versions_->LogAndApply(&edit, &mutex_);
}
// 如果成功,那么只读的内存表可以删除了
if (s.ok()) {
// Commit to the new state
imm_->Unref();
imm_ = NULL;
has_imm_.Release_Store(NULL);
// 删除过期文件
DeleteObsoleteFiles();
} else {
RecordBackgroundError(s);
}
}
// 打开压缩的输出文件,实质是创建一个table(SSTable)构建器
Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
assert(compact != NULL);
assert(compact->builder == NULL);
uint64_t file_number;
{
mutex_.Lock();
// 申请一个文件序号
file_number = versions_->NewFileNumber();
pending_outputs_.insert(file_number);
CompactionState::Output out;
out.number = file_number;
out.smallest.Clear();
out.largest.Clear();
// 把输出对象添加到输出队列中
compact->outputs.push_back(out);
mutex_.Unlock();
}
// Make the output file
std::string fname = TableFileName(dbname_, file_number);
// 创建一个输出文件
Status s = env_->NewWritableFile(fname, &compact->outfile);
if (s.ok()) {
// 为这个输出文件创建一个table构建器
compact->builder = new TableBuilder(options_, compact->outfile);
}
return s;
}
// 结束压缩输出文件
Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,Iterator* input)
{
assert(compact != NULL);
assert(compact->outfile != NULL);
assert(compact->builder != NULL);
// 输出对象的个数
const uint64_t output_number = compact->current_output()->number;
assert(output_number != 0);
// Check for iterator errors
Status s = input->status();
// 构造实体的个数
const uint64_t current_entries = compact->builder->NumEntries();
// 如果没有出错,那么进行SSTable的构建
if (s.ok()) {
s = compact->builder->Finish();
} else {
// 否则丢弃
compact->builder->Abandon();
}
// 统计字节数
const uint64_t current_bytes = compact->builder->FileSize();
compact->current_output()->file_size = current_bytes;
compact->total_bytes += current_bytes;
delete compact->builder;
compact->builder = NULL;
// Finish and check for file errors
// 进行同步
if (s.ok()) {
s = compact->outfile->Sync();
}
// 关闭文件
if (s.ok()) {
s = compact->outfile->Close();
}
delete compact->outfile;
compact->outfile = NULL;
if (s.ok() && current_entries > 0)
{
// Verify that the table is usable
// 验证table的cache是否可用
Iterator* iter = table_cache_->NewIterator(ReadOptions(),output_number,current_bytes);
s = iter->status();
delete iter;
if (s.ok())
{
Log(options_.info_log,
"Generated table #%llu: %lld keys, %lld bytes",
(unsigned long long) output_number,
(unsigned long long) current_entries,
(unsigned long long) current_bytes);
}
}
return s;
}
// 安装compact的结果
Status DBImpl::InstallCompactionResults(CompactionState* compact)
{
mutex_.AssertHeld();
// 记录日志
Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
compact->compaction->num_input_files(0),
compact->compaction->level(),
compact->compaction->num_input_files(1),
compact->compaction->level() + 1,
static_cast<long long>(compact->total_bytes));
// Add compaction outputs
// 添加压缩的输出文件
compact->compaction->AddInputDeletions(compact->compaction->edit());
const int level = compact->compaction->level();
// 对于compact的每一个输出
for (size_t i = 0; i < compact->outputs.size(); i++) {
const CompactionState::Output& out = compact->outputs[i];
// level上的文件添加到了level+1上
compact->compaction->edit()->AddFile(level + 1,out.number, out.file_size, out.smallest, out.largest);
}
// 应用版本修改
return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
}