Ceph Version : Kraken
Component:FileStore
FileStore::mkfs() 该方法是用于初始化OSD的文件系统,只有在安装OSD时由OSD::mkfs(…)发起调用。
mkfs中做了以下工作:
- 创建fsid文件,随机生产一个字符串作为OSD的UUID,保存到fsid文件中。
- 创建store_version文件,用于保存当前FileStore的版本。
- 创建superblock文件,保存FSSuperblock实例的序列化值。 创建current目录。
- 创建current/commit_op_seq文件,保存当前最新的commit op的序号。
- 创建kv数据库omap(生产current/omap目录)
- 创建kv数据库先的osd_uuid文件(current/omap/osd_uuid),并保持fsid于其中。
- 初始化OSD事务日志,如果是一个裸块设备,则检查该大小是否满足事务日志最小要求,并初始化日志的头;如果是一个文件用于事务日志,则构建该日志文件,并初始化日志头,其余空间填充0.
- 创建type文件,保存当前存储引擎类型(如:filestore)。
FileStore::mkfs()具体代码如下:
int FileStore::mkfs()
{
int ret = 0;
char fsid_fn[PATH_MAX];
char fsid_str[40];
uuid_d old_fsid;
uuid_d old_omap_fsid;
//1.打开存储空间的根目录basedir(/var/log/ceph/osd/ceph-*)
dout(1) << "mkfs in " << basedir << dendl;
basedir_fd = ::open(basedir.c_str(), O_RDONLY);
if (basedir_fd < 0) {
ret = -errno;
derr << "mkfs failed to open base dir " << basedir << ": " << cpp_strerror(ret) << dendl;
return ret;
}
//2.打开或者创建存储OSD文件系统UUID的文件(${basedir}/fsid 保存了OSD的uuid),生产fsid,并创建fsid文件
// open+lock fsid
snprintf(fsid_fn, sizeof(fsid_fn), "%s/fsid", basedir.c_str());
fsid_fd = ::open(fsid_fn, O_RDWR|O_CREAT, 0644);
if (fsid_fd < 0) {
ret = -errno;
derr << "mkfs: failed to open " << fsid_fn << ": " << cpp_strerror(ret) << dendl;
goto close_basedir_fd;
}
//对fsid文件上锁
if (lock_fsid() < 0) {
ret = -EBUSY;
goto close_fsid_fd;
}
//对其osd uuid 进行设置
if (read_fsid(fsid_fd, &old_fsid) < 0 || old_fsid.is_zero()) {
if (fsid.is_zero()) {
fsid.generate_random();
dout(1) << "mkfs generated fsid " << fsid << dendl;
} else {
dout(1) << "mkfs using provided fsid " << fsid << dendl;
}
fsid.print(fsid_str);
strcat(fsid_str, "\n");
ret = ::ftruncate(fsid_fd, 0);
if (ret < 0) {
ret = -errno;
derr << __FUNC__ << ": failed to truncate fsid: "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
ret = safe_write(fsid_fd, fsid_str, strlen(fsid_str));
if (ret < 0) {
derr << __FUNC__ << ": failed to write fsid: "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
if (::fsync(fsid_fd) < 0) {
ret = -errno;
derr << __FUNC__ << ": close failed: can't write fsid: "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
dout(10) << "mkfs fsid is " << fsid << dendl;
} else {
if (!fsid.is_zero() && fsid != old_fsid) {
derr << __FUNC__ << ": on-disk fsid " << old_fsid << " != provided " << fsid << dendl;
ret = -EINVAL;
goto close_fsid_fd;
}
fsid = old_fsid;
dout(1) << __FUNC__ << ": fsid is already set to " << fsid << dendl;
}
//创建store_version文件,写入FileStore 版本
// version stamp
ret = write_version_stamp();
if (ret < 0) {
derr << __FUNC__ << ": write_version_stamp() failed: "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
//创建superblock文件,写入FSSuperblock实例序列化保存到superblock中
// superblock
superblock.omap_backend = cct->_conf->filestore_omap_backend;
ret = write_superblock();
if (ret < 0) {
¦ derr << __FUNC__ << ": write_superblock() failed: "
<< cpp_strerror(ret) << dendl;
¦ goto close_fsid_fd;
}
struct statfs basefs;
ret = ::fstatfs(basedir_fd, &basefs);
if (ret < 0) {
¦ ret = -errno;
¦ derr << __FUNC__ << ": cannot fstatfs basedir "
<< cpp_strerror(ret) << dendl;
¦ goto close_fsid_fd;
}
//创建FileStore的存储后端如:(XFSFileStoreBackend)
create_backend(basefs.f_type);
//创建存储空间下的current目录
ret = backend->create_current();
if (ret < 0) {
¦ derr << __FUNC__ << ": failed to create current/ " << cpp_strerror(ret) << dendl;
¦ goto close_fsid_fd;
}
// write initial op_seq(commit_op_seq)
{
uint64_t initial_seq = 0;
int fd = read_op_seq(&initial_seq);
if (fd < 0) {
ret = fd;
derr << __FUNC__ << ": failed to create " << current_op_seq_fn << ": "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
if (initial_seq == 0) {
ret = write_op_seq(fd, 1);
if (ret < 0) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << __FUNC__ << ": failed to write to " << current_op_seq_fn << ": "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
if (backend->can_checkpoint()) {
// create snap_1 too
current_fd = ::open(current_fn.c_str(), O_RDONLY);
assert(current_fd >= 0);
char s[NAME_MAX];
snprintf(s, sizeof(s), COMMIT_SNAP_ITEM, 1ull);
ret = backend->create_checkpoint(s, NULL);
VOID_TEMP_FAILURE_RETRY(::close(current_fd));
if (ret < 0 && ret != -EEXIST) {
VOID_TEMP_FAILURE_RETRY(::close(fd));
derr << __FUNC__ << ": failed to create snap_1: " << cpp_strerror(ret) << dendl;
goto close_fsid_fd;
}
}
}
VOID_TEMP_FAILURE_RETRY(::close(fd));
}
//创建leveldb数据库,即创建current/omap文件夹
ret = KeyValueDB::test_init(superblock.omap_backend, omap_dir);
if (ret < 0) {
¦ derr << __FUNC__ << ": failed to create " << cct->_conf->filestore_omap_backend << dendl;
¦ goto close_fsid_fd;
}
//在omap下创建osd_uuid文件并保持fsid。
// create fsid under omap
// open+lock fsid
int omap_fsid_fd;
char omap_fsid_fn[PATH_MAX];
snprintf(omap_fsid_fn, sizeof(omap_fsid_fn), "%s/osd_uuid", omap_dir.c_str());
omap_fsid_fd = ::open(omap_fsid_fn, O_RDWR|O_CREAT, 0644);
if (omap_fsid_fd < 0) {
¦ ret = -errno;
¦ derr << __FUNC__ << ": failed to open " << omap_fsid_fn << ": " << cpp_strerror(ret) << dendl;
¦ goto close_fsid_fd;
}
if (read_fsid(omap_fsid_fd, &old_omap_fsid) < 0 || old_omap_fsid.is_zero()) {
¦ assert(!fsid.is_zero());
¦ fsid.print(fsid_str);
¦ strcat(fsid_str, "\n");
¦ ret = ::ftruncate(omap_fsid_fd, 0);
¦ if (ret < 0) {
¦ ¦ ret = -errno;
¦ ¦ derr << __FUNC__ << ": failed to truncate fsid: "
¦ << cpp_strerror(ret) << dendl;
¦ ¦ goto close_omap_fsid_fd;
¦ }
¦ ret = safe_write(omap_fsid_fd, fsid_str, strlen(fsid_str));
¦ if (ret < 0) {
¦ ¦ derr << __FUNC__ << ": failed to write fsid: "
¦ << cpp_strerror(ret) << dendl;
¦ ¦ goto close_omap_fsid_fd;
¦ }
¦ dout(10) << __FUNC__ << ": write success, fsid:" << fsid_str << ", ret:" << ret << dendl;
¦ if (::fsync(omap_fsid_fd) < 0) {
¦ ¦ ret = -errno;
¦ ¦ derr << __FUNC__ << ": close failed: can't write fsid: "
¦ << cpp_strerror(ret) << dendl;
¦ ¦ goto close_omap_fsid_fd;
¦ }
¦ dout(10) << "mkfs omap fsid is " << fsid << dendl;
} else {
¦ if (fsid != old_omap_fsid) {
¦ ¦ derr << __FUNC__ << ": " << omap_fsid_fn
¦ ¦ ¦ ¦ ¦<< " has existed omap fsid " << old_omap_fsid
¦ ¦ ¦ ¦ ¦<< " != expected osd fsid " << fsid
¦ ¦ ¦ ¦ ¦<< dendl;
¦ ¦ ret = -EINVAL;
¦ ¦ goto close_omap_fsid_fd;
¦ }
¦ dout(1) << __FUNC__ << ": omap fsid is already set to " << fsid << dendl;
}
dout(1) << cct->_conf->filestore_omap_backend << " db exists/created" << dendl;
//初始化journal,如:初始日志块设备,或者构建日志文件,然后写入日志头信息。
// journal?
ret = mkjournal();
if (ret)
¦ goto close_omap_fsid_fd;
//创建type文件,保存当前使用的存储引擎如:filestore
ret = write_meta("type", "filestore");
if (ret)
¦ goto close_omap_fsid_fd;
dout(1) << "mkfs done in " << basedir << dendl;
ret = 0;
close_omap_fsid_fd:
VOID_TEMP_FAILURE_RETRY(::close(omap_fsid_fd));
close_fsid_fd:
VOID_TEMP_FAILURE_RETRY(::close(fsid_fd));
fsid_fd = -1;
close_basedir_fd:
VOID_TEMP_FAILURE_RETRY(::close(basedir_fd));
delete backend;
backend = NULL;
return ret;
}
创建current/commit_op_seq文件用于记录commit op 序号
//从basedir/current/commit_op_seq文件中读取已经commited的op序列号。(commit_op_seq最小值为1)
int FileStore::read_op_seq(uint64_t *seq)
{
int op_fd = ::open(current_op_seq_fn.c_str(), O_CREAT|O_RDWR, 0644);
if (op_fd < 0) {
¦ int r = -errno;
¦ assert(!m_filestore_fail_eio || r != -EIO);
¦ return r;
}
char s[40];
memset(s, 0, sizeof(s));
int ret = safe_read(op_fd, s, sizeof(s) - 1);
if (ret < 0) {
¦ derr << "error reading " << current_op_seq_fn << ": " << cpp_strerror(ret) << dendl;
¦ VOID_TEMP_FAILURE_RETRY(::close(op_fd));
¦ assert(!m_filestore_fail_eio || ret != -EIO);
¦ return ret;
}
*seq = atoll(s);
return op_fd;
}
创建current目录
int GenericFileStoreBackend::create_current()
{
struct stat st;
int ret = ::stat(get_current_path().c_str(), &st);
if (ret == 0) {
¦ // current/ exists
¦ if (!S_ISDIR(st.st_mode)) {
¦ ¦ dout(0) << "_create_current: current/ exists but is not a directory" << dendl;
¦ ¦ ret = -EINVAL;
¦ }
} else {
¦ ret = ::mkdir(get_current_path().c_str(), 0755);
¦ if (ret < 0) {
¦ ¦ ret = -errno;
¦ ¦ dout(0) << "_create_current: mkdir " << get_current_path() << " failed: "<< cpp_strerror(ret) << dendl;
¦ }
}
return ret;
}
序列化FSSuperblock实例并保持到superblock文件中
int FileStore::write_superblock()
{
bufferlist bl;
::encode(superblock, bl);
return safe_write_file(basedir.c_str(), "superblock",
¦ ¦ bl.c_str(), bl.length());
}
保存FileStore的版本到store_version文件中
int FileStore::write_version_stamp()
{
bufferlist bl;
::encode(target_version, bl);
return safe_write_file(basedir.c_str(), "store_version",
¦ ¦ bl.c_str(), bl.length());
}