在btrfs和zfs文件系统中ceph 并行写日志和数据

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/tiantao2012/article/details/79965070
在filestore中中有针对btrfs或者zfs这样的文件系统提供parallel的功能,让日志和数据并行写入,这两个
文件系统提供最多两个checkpoint 来保证数据的rollback而xfs这样的文件系统只能先写日志再写数据。
具体可以从日志的的同步函数sync_entry 中可以明显看到
void FileStore::sync_entry()
{

	#这个if条件成立的话,说明当前文件系统是支持checkpoint的,这样可以并行写入日志和数据,而不用等日志写完再写数据
      if (backend->can_checkpoint()) {
	int err = write_op_seq(op_fd, cp);
	if (err < 0) {
	  derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
	  assert(0 == "error during write_op_seq");
	}

	char s[NAME_MAX];
	snprintf(s, sizeof(s), COMMIT_SNAP_ITEM, (long long unsigned)cp);
	uint64_t cid = 0;
	#新建checkpoint来,当数据错误是执行rollback操作
	err = backend->create_checkpoint(s, &cid);
	if (err < 0) {
	    int err = errno;
	    derr << "snap create '" << s << "' got error " << err << dendl;
	    assert(err == 0);
	}

	snaps.push_back(cp);
	#开始以实物的方式提供数据和日志
	apply_manager.commit_started();
	op_tp.unpause();

	if (cid > 0) {
	  dout(20) << " waiting for checkpoint " << cid << " to complete" << dendl;
	  #等待checkpoint的数据写完
	  err = backend->sync_checkpoint(cid);
	  if (err < 0) {
	    derr << "ioctl WAIT_SYNC got " << cpp_strerror(err) << dendl;
	    assert(0 == "wait_sync got error");
	  }
	  dout(20) << " done waiting for checkpoint " << cid << " to complete" << dendl;
	}
      } else {
	  #这个case就是日志和数据要按先后顺序写,针对xfs之类的文件系统,要执行很多日志的sync操作,很明显支持checkpoint的
	  #文件系统不用执行下面一堆的sync操作
	apply_manager.commit_started();
	op_tp.unpause();

	int err = object_map->sync();
	if (err < 0) {
	  derr << "object_map sync got " << cpp_strerror(err) << dendl;
	  assert(0 == "object_map sync returned error");
	}

	err = backend->syncfs();
	if (err < 0) {
	  derr << "syncfs got " << cpp_strerror(err) << dendl;
	  assert(0 == "syncfs returned error");
	}

	err = write_op_seq(op_fd, cp);
	if (err < 0) {
	  derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
	  assert(0 == "error during write_op_seq");
	}
	err = ::fsync(op_fd);
	if (err < 0) {
	  derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl;
	  assert(0 == "error during fsync of op_seq");
	}
      }

      // remove old snaps?
	  #从这里知道最多支持两个checkpoint,多余的checkpoint 会被删除.
      if (backend->can_checkpoint()) {
	char s[NAME_MAX];
	while (snaps.size() > 2) {
	  snprintf(s, sizeof(s), COMMIT_SNAP_ITEM, (long long unsigned)snaps.front());
	  snaps.pop_front();
	  dout(10) << "removing snap '" << s << "'" << dendl;
	  int r = backend->destroy_checkpoint(s);
	  if (r) {
	    int err = errno;
	    derr << "unable to destroy snap '" << s << "' got " << cpp_strerror(err) << dendl;
	  }
	}
      }

}

阅读更多

没有更多推荐了,返回首页