首先分析分析的是RecordType,用于记录log文件的结构
enum RecordType {
// Zero is reserved for preallocated files
kZeroType = 0,
kFullType = 1,
// For fragments
kFirstType = 2,
kMiddleType = 3,
kLastType = 4
};
static const int kMaxRecordType = kLastType;
static const int kBlockSize = 32768;
// Header is checksum (4 bytes), length (2 bytes), type (1 byte).
static const int kHeaderSize = 4 + 2 + 1;
leveldb的log写文件主要是log_writer.h和log_writer.cc两个文件
首先是文件属性的介绍:
WritableFile* dest_; //写文件的对象
int block_offset_; //当前文件的偏移量
日志写对象初始化
Writer::Writer(WritableFile* dest) : dest_(dest), block_offset_(0) {
InitTypeCrc(type_crc_);
}
Writer::Writer(WritableFile* dest, uint64_t dest_length)
: dest_(dest), block_offset_(dest_length % kBlockSize) {
InitTypeCrc(type_crc_);
}
下面是向日志中写内容方法的介绍
Status Writer::AddRecord(const Slice& slice) {
const char* ptr = slice.data();//记录的内容
size_t left = slice.size();//记录的长度
Status s;
bool begin = true;
do {
const int leftover = kBlockSize - block_offset_; //当前文件的偏移量
assert(leftover >= 0);
if (leftover < kHeaderSize) { //判断当前文件剩余空间是否小于Header大小(7)
// Switch to a new block
if (leftover > 0) {
// Fill the trailer (literal below relies on kHeaderSize being 7)
static_assert(kHeaderSize == 7, "");
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
}
block_offset_ = 0;
}
// Invariant: we never leave < kHeaderSize bytes in a block.
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;//计算当前填写内容的空间大小
const size_t fragment_length = (left < avail) ? left : avail;
RecordType type;
const bool end = (left == fragment_length);//判断剩余空间是否能够将所有内容填写完
if (begin && end) { //判断写入的文件内容的type
type = kFullType;
} else if (begin) {
type = kFirstType;
} else if (end) {
type = kLastType;
} else {
type = kMiddleType;
}
s = EmitPhysicalRecord(type, ptr, fragment_length); //调用私有函数进行内容的写入
ptr += fragment_length;
left -= fragment_length;
begin = false;
} while (s.ok() && left > 0);
return s;
}
真正执行写入的方法:
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr,
size_t length) {
assert(length <= 0xffff); // Must fit in two bytes
assert(block_offset_ + kHeaderSize + length <= kBlockSize);
// Format the header
char buf[kHeaderSize];
buf[4] = static_cast<char>(length & 0xff);
buf[5] = static_cast<char>(length >> 8);
buf[6] = static_cast<char>(t);
// Compute the crc of the record type and the payload.
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, length);
crc = crc32c::Mask(crc); // Adjust for storage
EncodeFixed32(buf, crc);
// Write the header and the payload
Status s = dest_->Append(Slice(buf, kHeaderSize));
if (s.ok()) {
s = dest_->Append(Slice(ptr, length));
if (s.ok()) {
s = dest_->Flush();
}
}
block_offset_ += kHeaderSize + length;
return s;
}
底层日志文件的实现
WritableFile类
class LEVELDB_EXPORT WritableFile {
public:
WritableFile() = default;
WritableFile(const WritableFile&) = delete;
WritableFile& operator=(const WritableFile&) = delete;
virtual ~WritableFile();
virtual Status Append(const Slice& data) = 0; //添加内容
virtual Status Close() = 0;//关闭文件
virtual Status Flush() = 0;//刷新文件
virtual Status Sync() = 0;//同步文件
};
WritableFile类只是作为一个抽象基类,定义了一些纯虚函数作为接口,最终作为父类被PosixWritableFile类继承。
lass PosixWritableFile final : public WritableFile {
public:
PosixWritableFile(std::string filename, int fd)
: pos_(0),
fd_(fd),
is_manifest_(IsManifest(filename)),
filename_(std::move(filename)),
dirname_(Dirname(filename_)) {}
~PosixWritableFile() override {
if (fd_ >= 0) {
// Ignoring any potential errors
Close();
}
}
Status Append(const Slice& data) override {
size_t write_size = data.size();
const char* write_data = data.data();
// Fit as much as possible into buffer.
size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
std::memcpy(buf_ + pos_, write_data, copy_size);
write_data += copy_size;
write_size -= copy_size;
pos_ += copy_size;
if (write_size == 0) {
return Status::OK();
}
// Can't fit in buffer, so need to do at least one write.
Status status = FlushBuffer();
if (!status.ok()) {
return status;
}
// Small writes go to buffer, large writes are written directly.
if (write_size < kWritableFileBufferSize) {
std::memcpy(buf_, write_data, write_size);
pos_ = write_size;
return Status::OK();
}
return WriteUnbuffered(write_data, write_size);
}
Status Close() override {
Status status = FlushBuffer();
const int close_result = ::close(fd_);
if (close_result < 0 && status.ok()) {
status = PosixError(filename_, errno);
}
fd_ = -1;
return status;
}
Status Flush() override { return FlushBuffer(); }
Status Sync() override {
// Ensure new files referred to by the manifest are in the filesystem.
//
// This needs to happen before the manifest file is flushed to disk, to
// avoid crashing in a state where the manifest refers to files that are not
// yet on disk.
Status status = SyncDirIfManifest();
if (!status.ok()) {
return status;
}
status = FlushBuffer();
if (!status.ok()) {
return status;
}
return SyncFd(fd_, filename_);
}
private:
Status FlushBuffer() {
Status status = WriteUnbuffered(buf_, pos_);
pos_ = 0;
return status;
}
Status WriteUnbuffered(const char* data, size_t size) {
while (size > 0) {
ssize_t write_result = ::write(fd_, data, size);
if (write_result < 0) {
if (errno == EINTR) {
continue; // Retry
}
return PosixError(filename_, errno);
}
data += write_result;
size -= write_result;
}
return Status::OK();
}
Status SyncDirIfManifest() {
Status status;
if (!is_manifest_) {
return status;
}
int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
if (fd < 0) {
status = PosixError(dirname_, errno);
} else {
status = SyncFd(fd, dirname_);
::close(fd);
}
return status;
}
static Status SyncFd(int fd, const std::string& fd_path) {
if (sync_success) {
return Status::OK();
}
return PosixError(fd_path, errno);
}
// Returns the directory name in a path pointing to a file.
//
// Returns "." if the path does not contain any directory separator.
static std::string Dirname(const std::string& filename) {
std::string::size_type separator_pos = filename.rfind('/');
if (separator_pos == std::string::npos) {
return std::string(".");
}
// The filename component should not contain a path separator. If it does,
// the splitting was done incorrectly.
assert(filename.find('/', separator_pos + 1) == std::string::npos);
return filename.substr(0, separator_pos);
}
// Extracts the file name from a path pointing to a file.
//
// The returned Slice points to |filename|'s data buffer, so it is only valid
// while |filename| is alive and unchanged.
static Slice Basename(const std::string& filename) {
std::string::size_type separator_pos = filename.rfind('/');
if (separator_pos == std::string::npos) {
return Slice(filename);
}
// The filename component should not contain a path separator. If it does,
// the splitting was done incorrectly.
assert(filename.find('/', separator_pos + 1) == std::string::npos);
return Slice(filename.data() + separator_pos + 1,
filename.length() - separator_pos - 1);
}
// True if the given file is a manifest file.
static bool IsManifest(const std::string& filename) {
return Basename(filename).starts_with("MANIFEST");
}
// buf_[0, pos_ - 1] contains data to be written to fd_.
char buf_[kWritableFileBufferSize];
size_t pos_;
int fd_;
const bool is_manifest_; // True if the file's name starts with MANIFEST.
const std::string filename_;
const std::string dirname_; // The directory of filename_.
};