在filestore中中有针对btrfs或者zfs这样的文件系统提供parallel的功能,让日志和数据并行写入,这两个
文件系统提供最多两个checkpoint 来保证数据的rollback而xfs这样的文件系统只能先写日志再写数据。
具体可以从日志的的同步函数sync_entry 中可以明显看到
void FileStore::sync_entry()
{
#这个if条件成立的话,说明当前文件系统是支持checkpoint的,这样可以并行写入日志和数据,而不用等日志写完再写数据
if (backend->can_checkpoint()) {
int err = write_op_seq(op_fd, cp);
if (err < 0) {
derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
assert(0 == "error during write_op_seq");
}
char s[NAME_MAX];
snprintf(s, sizeof(s), COMMIT_SNAP_ITEM, (long long unsigned)cp);
uint64_t cid = 0;
#新建checkpoint来,当数据错误是执行rollback操作
err = backend->create_checkpoint(s, &cid);
if (err < 0) {
int err = errno;
derr << "snap create '" << s << "' got error " << err << dendl;
assert(err == 0);
}
snaps.push_back(cp);
#开始以实物的方式提供数据和日志
apply_manager.commit_started();
op_tp.unpause();
if (cid > 0) {
dout(20) << " waiting for checkpoint " << cid << " to complete" << dendl;
#等待checkpoint的数据写完
err = backend->sync_checkpoint(cid);
if (err < 0) {
derr << "ioctl WAIT_SYNC got " << cpp_strerror(err) << dendl;
assert(0 == "wait_sync got error");
}
dout(20) << " done waiting for checkpoint " << cid << " to complete" << dendl;
}
} else {
#这个case就是日志和数据要按先后顺序写,针对xfs之类的文件系统,要执行很多日志的sync操作,很明显支持checkpoint的
#文件系统不用执行下面一堆的sync操作
apply_manager.commit_started();
op_tp.unpause();
int err = object_map->sync();
if (err < 0) {
derr << "object_map sync got " << cpp_strerror(err) << dendl;
assert(0 == "object_map sync returned error");
}
err = backend->syncfs();
if (err < 0) {
derr << "syncfs got " << cpp_strerror(err) << dendl;
assert(0 == "syncfs returned error");
}
err = write_op_seq(op_fd, cp);
if (err < 0) {
derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
assert(0 == "error during write_op_seq");
}
err = ::fsync(op_fd);
if (err < 0) {
derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl;
assert(0 == "error during fsync of op_seq");
}
}
// remove old snaps?
#从这里知道最多支持两个checkpoint,多余的checkpoint 会被删除.
if (backend->can_checkpoint()) {
char s[NAME_MAX];
while (snaps.size() > 2) {
snprintf(s, sizeof(s), COMMIT_SNAP_ITEM, (long long unsigned)snaps.front());
snaps.pop_front();
dout(10) << "removing snap '" << s << "'" << dendl;
int r = backend->destroy_checkpoint(s);
if (r) {
int err = errno;
derr << "unable to destroy snap '" << s << "' got " << cpp_strerror(err) << dendl;
}
}
}
}
在btrfs和zfs文件系统中ceph 并行写日志和数据
最新推荐文章于 2023-07-03 16:43:21 发布