在bluestore 中时通rocksenv为rockdb来运行环境来让rockdb存储元数据。所以ceph通过一个简单文件系统bluefs来实现rocksenv的接口
class BlueFS {
public:
CephContext* cct;
#可以看到bluefs中支持下面这三种块设备
static constexpr unsigned MAX_BDEV = 3;
static constexpr unsigned BDEV_WAL = 0;
static constexpr unsigned BDEV_DB = 1;
static constexpr unsigned BDEV_SLOW = 2;
#文件系统在内存中的映像,下面这两个map中包含了所有的文件和目录
mempool::bluefs::map<string, DirRef> dir_map; ///< dirname -> Dir
mempool::bluefs::unordered_map<uint64_t,FileRef> file_map; ///< ino -> File
};
bluefs的初始化流程如下:
int BlueStore::_open_db(bool create, bool to_repair_db)
{
int r;
assert(!db);
string fn = path + "/db";
string options;
stringstream err;
ceph::shared_ptr<Int64ArrayMergeOperator> merge_op(new Int64ArrayMergeOperator);
string kv_backend;
std::vector<KeyValueDB::ColumnFamily> cfs;
#读取元数据
if (create) {
kv_backend = cct->_conf->bluestore_kvbackend;
} else {
r = read_meta("kv_backend", &kv_backend);
if (r < 0) {
derr << __func__ << " unable to read 'kv_backend' meta" << dendl;
return -EIO;
}
}
#根据元数据在内存中新建bluefs
bluefs = new BlueFS(cct);
#读取块设置的元数据
// shared device
if (read_meta("path_block", &bfn) < 0) {
bfn = path + "/block";
}
#添加设备
r = bluefs->add_block_device(bluefs_shared_bdev, bfn);
if (r < 0) {
derr << __func__ << " add block device(" << bfn << ") returned: "
<< cpp_strerror(r) << dendl;
goto free_bluefs;
}
// align to bluefs's alloc_size
initial = P2ROUNDUP(initial, cct->_conf->bluefs_alloc_size);
// put bluefs in the middle of the device in case it is an HDD
uint64_t start = P2ALIGN((bdev->get_size() - initial) / 2,
cct->_conf->bluefs_alloc_size);
#添加设备的存储空间
bluefs->add_block_extent(bluefs_shared_bdev, start, initial);
bluefs_extents.insert(start, initial);
}
if (create) {
#格式化文件系统
bluefs->mkfs(fsid);
}
#mount文件系统
r = bluefs->mount();
if (r < 0) {
derr << __func__ << " failed bluefs mount: " << cpp_strerror(r) << dendl;
goto free_bluefs;
}
}
int BlueFS::mount()
{
dout(1) << __func__ << dendl;
int r = _open_super();
if (r < 0) {
derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl;
goto out;
}
block_all.clear();
block_all.resize(MAX_BDEV);
#初始化allocator为磁盘所有的空间
_init_alloc();
#从日志项中构建bluefs中的dir_map和file_map
r = _replay(false, false);
if (r < 0) {
derr << __func__ << " failed to replay log: " << cpp_strerror(r) << dendl;
_stop_alloc();
goto out;
}
#初始化alloc中所有空闲的磁盘空间list
for (auto& p : file_map) {
dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl;
for (auto& q : p.second->fnode.extents) {
alloc[q.bdev]->init_rm_free(q.offset, q.length);
}
}
}
void BlueFS::_init_alloc()
{
dout(20) << __func__ << dendl;
alloc.resize(MAX_BDEV);
pending_release.resize(MAX_BDEV);
for (unsigned id = 0; id < bdev.size(); ++id) {
#block id为null,则退出
if (!bdev[id]) {
continue;
}
assert(bdev[id]->get_size());
为每个block 设备创建一个Allocator
alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator,
bdev[id]->get_size(),
cct->_conf->bluefs_alloc_size);
interval_set<uint64_t>& p = block_all[id];
for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) {
#并添加这个块设备的起始地址和接收地址
alloc[id]->init_add_free(q.get_start(), q.get_len());
}
}
}
可见在_init_alloc 中会为每个block 设备创建一个Allocator。并添加这个块设备的起始地址和接收地址
bluefs
最新推荐文章于 2023-09-04 15:13:03 发布