参考:http://blog.csdn.net/tankles/article/details/7663873
leveldb中log的作用主要用于数据恢复即在metatbale中的数据还没来及flush进sstable文件中时,该部分文件存在于log文件中所以重启时进行数据恢复。log文件的主要结构如下所示。
LevelDb对于一个log文件,会把它切割成以32K为单位的物理Block,每次读取的单位以一个Block作为基本读取单位,下图展示的log文件由3个Block构成,所以从物理布局来讲,一个log文件就是由连续的32K大小Block构成的。
每32K为一个block,每个block中又包含多个recorder,其中recorder的结构如下。
class Writer {
public:
// Create a writer that will append data to "*dest".
// "*dest" must be initially empty.
// "*dest" must remain live while this Writer is in use.
explicit Writer(WritableFile* dest);
~Writer();
Status AddRecord(const Slice& slice);
private:
WritableFile* dest_;
int block_offset_; // Current offset in block
// crc32c values for all supported record types. These are
// pre-computed to reduce the overhead of computing the crc of the
// record type stored in the header.
uint32_t type_crc_[kMaxRecordType + 1];
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
// No copying allowed
Writer(const Writer&);
void operator=(const Writer&);
};
}
我们仔细看下append
Status Writer::AddRecord(const Slice& slice) {
const char* ptr = slice.data();
size_t left = slice.size();
// Fragment the record if necessary and emit it. Note that if slice
// is empty, we still want to iterate once to emit a single
// zero-length record
Status s;
bool begin = true;
do {
const int leftover = kBlockSize - block_offset_; //看下该block还剩多少空间
assert(leftover >= 0);
if (leftover < kHeaderSize) { // 如果小于7个字节则填充<p class="p1">// Header is checksum (4 bytes), type (1 byte), length (2 bytes).</p><p class="p2"><span class="s1">//static</span> <span class="s1">const</span> <span class="s1">int</span> kHeaderSize = 4 + 1 + 2;</p>
// Switch to a new block
if (leftover > 0) {
// Fill the trailer (literal below relies on kHeaderSize being 7)
assert(kHeaderSize == 7);
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
}
block_offset_ = 0;
}
// Invariant: we never leave < kHeaderSize bytes in a block.
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
const size_t fragment_length = (left < avail) ? left : avail;
RecordType type;
const bool end = (left == fragment_length);
if (begin && end) { //在此区分那种类型,写的真够恶心的
type = kFullType;
} else if (begin) {
type = kFirstType;
} else if (end) {
type = kLastType;
} else {
type = kMiddleType;
}
s = EmitPhysicalRecord(type, ptr, fragment_length); //写入mmap的内存中<pre name="code" class="cpp">Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
assert(n <= 0xffff); // Must fit in two bytes
assert(block_offset_ + kHeaderSize + n <= kBlockSize);
// Format the header
char buf[kHeaderSize];
buf[4] = static_cast<char>(n & 0xff); //头部的第四和第五字节为数据的长度,低8字节
buf[5] = static_cast<char>(n >> 8); //高8字节
buf[6] = static_cast<char>(t); //type
// Compute the crc of the record type and the payload.
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); //crc填充这块不是很懂??
crc = crc32c::Mask(crc); // Adjust for storage
EncodeFixed32(buf, crc);
// Write the header and the payload
Status s = dest_->Append(Slice(buf, kHeaderSize)); //append header
if (s.ok()) {
s = dest_->Append(Slice(ptr, n)); //append value
if (s.ok()) {
s = dest_->Flush(); //flush 进文件
}
}
block_offset_ += kHeaderSize + n;
return s;
}
ptr += fragment_length; left -= fragment_length; begin = false; } while (s.ok() && left > 0); return s;} reader 我们可以参考log_reader.cc 和log_reader.h 进行分析