Cache in BlueStore
代码量不大,所以全部贴出。
CacheShard
CacheShard 是整个BlueStore中实现Cache的基类。
/// A generic Cache Shard
struct CacheShard {
CephContext *cct;
PerfCounters *logger;
// 注意cache的操作一定要线程安全,这里创建一个锁
/// protect lru and other structures
ceph::recursive_mutex lock = {
ceph::make_recursive_mutex("BlueStore::CacheShard::lock")};
// cache 种最大容量和当前使用量
std::atomic <uint64_t> max = {0};
std::atomic <uint64_t> num = {0};
CacheShard(CephContext *cct) : cct(cct), logger(nullptr) {}
virtual ~CacheShard() {}
// 设置容量
void set_max(uint64_t max_) {
max = max_;
}
// 获取当前使用量
uint64_t _get_num() {
return num;
}
// 在 LruOnodeCacheShard、LruBufferCacheShard和TwoQBufferCacheShard种实现
virtual void _trim_to(uint64_t new_size) = 0;
void _trim() {
if (cct->_conf->objectstore_blackhole) {
// do not trim if we are throwing away IOs a layer down
return;
}
_trim_to(max);
}
void trim() {
std::lock_guard l(lock);
_trim();
}
void flush() {
std::lock_guard l(lock);
// we should not be shutting down after the blackhole is enabled
assert(!cct->_conf->objectstore_blackhole);
_trim_to(0);
}
#ifdef DEBUG_CACHE
virtual void _audit(const char *s) = 0;
#else
void _audit(const char *s) { /* no-op */ }
#endif
};
OnodeCacheShard
/// A Generic onode Cache Shard
struct OnodeCacheShard : public CacheShard {
std::atomic <uint64_t> num_pinned = {0};
std::array<std::pair<ghobject_t, ceph::mono_clock::time_point>, 64> dumped_onodes;
virtual void _pin(Onode *o) = 0;
virtual void _unpin(Onode *o) = 0;
public:
OnodeCacheShard(CephContext *cct) : CacheShard(cct) {}
static OnodeCacheShard *create(CephContext *cct, std::string type,
PerfCounters *logger);
virtual void _add(Onode *o, int level) = 0;
virtual void _rm(Onode *o) = 0;
virtual void _unpin_and_rm(Onode *o) = 0;
virtual void move_pinned(OnodeCacheShard *to, Onode *o) = 0;
virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0;
bool empty() {
return _get_num() == 0;
}
};
OnodeCacheShard::create()
// 父类引用指向子类对象。实际返回LruOnodeCacheShard对象。
// OnodeCacheShard
BlueStore::OnodeCacheShard *BlueStore::OnodeCacheShard::create(
CephContext *cct,
string type,
PerfCounters *logger) {
BlueStore::OnodeCacheShard *c = nullptr;
// Currently we only implement an LRU cache for onodes
c = new LruOnodeCacheShard(cct);
c->logger = logger;
return c;
}
LruOnodeCacheShard
// LruOnodeCacheShard
struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard {
// 创建一个list,其中保存的是Bluestore::Onode。list<Onode>
typedef boost::intrusive::list<
BlueStore::Onode,
boost::intrusive::member_hook<
BlueStore::Onode,
boost::intrusive::list_member_hook<>,
&BlueStore::Onode::lru_item> > list_t;
list_t lru;
// 构造函数,只在 OnodeCacheShard::create() 中被引用。
explicit LruOnodeCacheShard(CephContext *cct) : BlueStore::OnodeCacheShard(cct) {}
// 把onode加入缓存队列。
void _add(BlueStore::Onode *o, int level) override {
// put_cache()中设置Onode.cached标志位,表示该Onode加入缓存。检查pinned标志位,返回!pined。
// pined 表示该对象是否加入缓存淘汰队列,如果不加入,说明不会被淘汰。
if (o->put_cache()) {
// level > 0,加入lru头部
// level <= 0,加入lru尾部
(level > 0) ? lru.push_front(*o) : lru.push_back(*o);
} else {
++num_pinned;
}
++num; // we count both pinned and unpinned entries
dout(20) << __func__ << " " << this << " " << o->oid << " added, num=" << num << dendl;
}
// 把onode从缓存队列中删除
void _rm(BlueStore::Onode *o) override {
// pop_cache()删除Onode.cached标志位(置未false)。检查pinned标志位,返回!pined。
if (o->pop_cache()) {
lru.erase(lru.iterator_to(*o));
} else {
ceph_assert(num_pinned);
--num_pinned;
}
ceph_assert(num);
--num;
dout(20) << __func__ << " " << this << " " << " " << o->oid << " removed, num=" << num << dendl;
}
// 把对象从缓存队列中删除,并把num_pinned++,需要先调用add
void _pin(BlueStore::Onode *o) override {
lru.erase(lru.iterator_to(*o));
++num_pinned;
dout(20) << __func__ << this << " " << " " << " " << o->oid << " pinned" << dendl;
}
// 把对象加入缓存队列,num_pinned--,需要先调用add
void _unpin(BlueStore::Onode *o) override {
lru.push_front(*o);
ceph_assert(num_pinned);
--num_pinned;
dout(20) << __func__ << this << " " << " " << " " << o->oid << " unpinned" << dendl;
}
// pined对象不在list队列中,所以只要把onode对象的cache标志位置为false即可
void _unpin_and_rm(BlueStore::Onode *o) override {
o->pop_cache();
ceph_assert(num_pinned);
--num_pinned;
ceph_assert(num);
--num;
}
//裁剪lru_list大小,此函数只能缩小,不能扩大,因为是删除操作,总不能无中生有吧
void _trim_to(uint64_t new_size) override {
if (new_size >= lru.size()) {
return; // don't even try
}
uint64_t n = lru.size() - new_size;
auto p = lru.end();
ceph_assert(p != lru.begin());
--p;
ceph_assert(num >= n);
num -= n;
while (n-- > 0) {
BlueStore::Onode *o = &*p;
dout(20) << __func__ << " rm " << o->oid << " "
<< o->nref << " " << o->cached << " " << o->pinned << dendl;
if (p != lru.begin()) {
lru.erase(p--);
} else {
ceph_assert(n == 0);
lru.erase(p);
}
auto pinned = !o->pop_cache();
ceph_assert(!pinned);
o->c->onode_map._remove(o->oid);
}
}
// 移动pinned对象到新的OnodeCacheShard
void move_pinned(OnodeCacheShard *to, BlueStore::Onode *o) override {
if (to == this) {
return;
}
ceph_assert(o->cached);
ceph_assert(o->pinned);
ceph_assert(num);
ceph_assert(num_pinned);
--num_pinned;
--num;
++to->num_pinned;
++to->num;
}
// 统计对象总数和pinned对象数
void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override {
*onodes += num;
*pinned_onodes += num_pinned;
}
};