levelDB源码分析-Cache（LRUCache、HashTable）

最新推荐文章于 2023-03-22 21:00:56 发布

巴山独钓

最新推荐文章于 2023-03-22 21:00:56 发布

阅读量5.3k

点赞数

分类专栏： levelDB 文章标签： cache list null insert table combinations

本文链接：https://blog.csdn.net/tankles/article/details/7663608

版权

levelDB 专栏收录该内容

11 篇文章 10 订阅

订阅专栏

leveldb内部通过双向链表实现了一个LRUCache，利用双向链表实现LRUCache的算法在《操作系统》中描述的很清楚，这里和标准的LRUCache一样，这里不再描述。
LRUCache内部实现了一个Hashtable，用于快速查找key对应的双向链表节点。

Class Cache采用虚函数定义了Cache的接口，具体实现的LRUCache继承Cache，实现了相应的功能，实际操作为ShardedLRUCache类，而且外部节点为Handle，内部采用LRUHandle操作。

LRUCache是线程安全的，为了多线程访问，尽可能快速，减少锁开销，ShardedLRUCache内部有16个LRUCache，查找Key时首先计算key属于哪一个分片，分片的计算方法是取32位hash值的高4位，然后在相应的LRUCache中进行查找，这样就大大减少了多线程的访问锁的开销。

LRUCache shard_[kNumShards]

   // Create a new cache with a fixed size capacity.  This implementation
    // of Cache uses a least-recently-used eviction policy.
    extern Cache* NewLRUCache(size_t capacity);								// 创建一个固定容量的LRUCache

    class Cache {
        public:
        Cache() { }

        // Destroys all existing entries by calling the "deleter"
        // function that was passed to the constructor.
        virtual ~Cache();

        // Opaque handle to an entry stored in the cache.
        struct Handle { };												// 对外的节点接口

        // Insert a mapping from key->value into the cache and assign it
        // the specified charge against the total cache capacity.
        //
        // Returns a handle that corresponds to the mapping.  The caller
        // must call this->Release(handle) when the returned mapping is no
        // longer needed.
        //
        // When the inserted entry is no longer needed, the key and
        // value will be passed to "deleter".								// 插入一个节点
        virtual Handle* Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value)) = 0;

        // If the cache has no mapping for "key", returns NULL.
        //
        // Else return a handle that corresponds to the mapping.  The caller
        // must call this->Release(handle) when the returned mapping is no
        // longer needed.
        virtual Handle* Lookup(const Slice& key) = 0;						// 查找特定的key

        // Release a mapping returned by a previous Lookup().
        // REQUIRES: handle must not have been released yet.
        // REQUIRES: handle must have been returned by a method on *this.
        virtual void Release(Handle* handle) = 0;							// 减少handle的引用计数

        // Return the value encapsulated in a handle returned by a
        // successful Lookup().
        // REQUIRES: handle must not have been released yet.
        // REQUIRES: handle must have been returned by a method on *this.
        virtual void* Value(Handle* handle) = 0;							// 返回其值

        // If the cache contains entry for key, erase it.  Note that the
        // underlying entry will be kept around until all existing handles
        // to it have been released.
        virtual void Erase(const Slice& key) = 0;							// 删除key

        // Return a new numeric id.  May be used by multiple clients who are
        // sharing the same cache to partition the key space.  Typically the
        // client will allocate a new id at startup and prepend the id to
        // its cache keys.
        virtual uint64_t NewId() = 0;										// 返回唯一的ID

    private:
        void LRU_Remove(Handle* e);										// 从LRU队列中删除节点e
        void LRU_Append(Handle* e);										// 将节点e插入LRU队列
        void Unref(Handle* e);											// 减少节点e的引用计数

        struct Rep;
        Rep* rep_;

        // No copying allowed
        Cache(const Cache&);												// 不允许拷贝构造函数和赋值运算符
        void operator=(const Cache&);
        };

// LRU双向链表节点

        // An entry is a variable length heap-allocated structure.  Entries
        // are kept in a circular doubly linked list ordered by access time.
        struct LRUHandle {												// 双向链表节点
            void* value;												//? 存储值
            void (*deleter)(const Slice&, void* value);						// 资源回收函数
            LRUHandle* next_hash;											// 用于hash table中bucket单向链表
            LRUHandle* next;												// next和prev为LRU队列双向链表
            LRUHandle* prev;
            size_t charge;      // TODO(opt): Only allow uint32_t?	
            size_t key_length;											// key长度
            uint32_t refs;												// 引用计数
            uint32_t hash;      // Hash of key(); used for fast sharding and comparisons	// hash值
            char key_data[1];   // Beginning of key							// key指针

            Slice key() const {											// 返回key
                // For cheaper lookups, we allow a temporary Handle object
                // to store a pointer to a key in "value".
                if (next == this) {
                    return *(reinterpret_cast<Slice*>(value));					//? 
                } else {
                    return Slice(key_data, key_length);
                }
            }
       };

哈希表：

        // We provide our own simple hash table since it removes a whole bunch
        // of porting hacks and is also faster than some of the built-in hash
        // table implementations in some of the compiler/runtime combinations
        // we have tested.  E.g., readrandom speeds up by ~5% over the g++
        // 4.4.3's builtin hashtable.
        class HandleTable {
           public:
            HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); }		// 这里Resize()相当于初始化
            ~HandleTable() { delete[] list_; }								// 删除list_

            LRUHandle* Lookup(const Slice& key, uint32_t hash) {	            	// 查找key，找到返回其指针，否则返回NULL
                return *FindPointer(key, hash);
            }
            
            LRUHandle* Insert(LRUHandle* h) {								// 插入节点，存在时替换，返回旧的
                LRUHandle** ptr = FindPointer(h->key(), h->hash);
                LRUHandle* old = *ptr;                                			// old==NULL，表示没有找到
                h->next_hash = (old == NULL ? NULL : old->next_hash); 			// 如果存在，则替换，否则插入最后
                *ptr = h;
                if (old == NULL) {// old==NULL没有找到，表示插入新节点
                    ++elems_;
                    if (elems_ > length_) {
                        // Since each cache entry is fairly large, we aim for a small
                        // average linked list length (<= 1).
                        Resize();
                    }
                }
                return old;
            }
            
            LRUHandle* Remove(const Slice& key, uint32_t hash) {					// 如果存在key节点，则从哈希表删除
                LRUHandle** ptr = FindPointer(key, hash);
                LRUHandle* result = *ptr;
                if (result != NULL) {
                    *ptr = result->next_hash;
                    --elems_;
                }
                return result;
            }

            private:
            // The table consists of an array of buckets where each bucket is
            // a linked list of cache entries that hash into the bucket.
            uint32_t length_;												// 哈希表长度
            uint32_t elems_;													// 哈希表元素数量
            LRUHandle** list_;												// 哈希表指针数组

            // Return a pointer to slot that points to a cache entry that
            // matches key/hash.  If there is no such cache entry, return a
            // pointer to the trailing slot in the corresponding linked list.
            LRUHandle** FindPointer(const Slice& key, uint32_t hash)  				// 如果找到就返回指向命中节点的指针，
            {																// 否则，返回对应double linked list的结尾
                LRUHandle** ptr = &list_[hash & (length_ - 1)];
                // 查找节点
                while (*ptr != NULL && ((*ptr)->hash != hash || key != (*ptr)->key()))	// 节点非空 && (hash相同时，再判断key是否相同）
                {
                    ptr = &(*ptr)->next_hash;
                }
                return ptr;
            }

            void Resize() { 													// resize hash表大小
                uint32_t new_length = 4;
                while (new_length < elems_) {									// 以2倍的大小重新设置哈希表的长度
                    new_length *= 2;
                }
                LRUHandle** new_list = new LRUHandle*[new_length];					// 重新分配哈希表
                memset(new_list, 0, sizeof(new_list[0]) * new_length);
                uint32_t count = 0;
                for (uint32_t i = 0; i < length_; i++) { 							// 重新hash，一次全部执行完毕
                    LRUHandle* h = list_[i];									// 一个bucket
                    while (h != NULL) 
                    {
                        LRUHandle* next = h->next_hash;
                        Slice key = h->key();
                        uint32_t hash = h->hash;								// 记录hash，不用重新计算了
                        LRUHandle** ptr = &new_list[hash & (new_length - 1)];		// 应该放入的bucket
                        h->next_hash = *ptr; 									// 放到新bucket的开头，*ptr为指针数组的一个指针值
                        *ptr = h;
                        h = next;
                        count++;
                    }
                }
                assert(elems_ == count);
                delete[] list_;												// 删除旧的哈希表
                list_ = new_list;												// 新的哈希表
                length_ = new_length;
            }
        };

一个LRUCache定义：

        // A single shard of sharded cache.
        class LRUCache {
          public:
            LRUCache();
            ~LRUCache();

            // Separate from constructor so caller can easily make an array of LRUCache
            void SetCapacity(size_t capacity) { capacity_ = capacity; }					// 设置容量

            // Like Cache methods, but with an extra "hash" parameter.
            Cache::Handle* Insert(const Slice& key, uint32_t hash,						// 插入一个节点
							void* value, size_t charge,
							void (*deleter)(const Slice& key, void* value));
            
            Cache::Handle* Lookup(const Slice& key, uint32_t hash);						// 查找一个节点
            void Release(Cache::Handle* handle);									// 释放引用计数
            void Erase(const Slice& key, uint32_t hash);								// 清除一个节点

          private:
            void LRU_Remove(LRUHandle* e);
            void LRU_Append(LRUHandle* e);
            void Unref(LRUHandle* e);

            // Initialized before use.
            size_t capacity_;													// LRUCache容量

            // mutex_ protects the following state.
            port::Mutex mutex_;													// 互斥访问
            size_t usage_;														// 
            uint64_t last_id_;													// 

            // Dummy head of LRU list.
            // lru.prev is newest entry, lru.next is oldest entry.
            LRUHandle lru_;  													// LRU队列的头结点

            // LRU队列采用hashtable，是为了访问节点效率更高
            HandleTable table_;           											// 哈希表
        };

 	   LRUCache::LRUCache()														// 构造函数
        : usage_(0),
        last_id_(0) {
            // Make empty circular linked list
            lru_.next = &lru_;
            lru_.prev = &lru_;
        }

        LRUCache::~LRUCache() {
            for (LRUHandle* e = lru_.next; e != &lru_; ) {
                LRUHandle* next = e->next;
                assert(e->refs == 1);  // Error if caller has an unreleased handle
                Unref(e);
                e = next;
            }
        }

        void LRUCache::Unref(LRUHandle* e) {										// 减少引用计数	
            assert(e->refs > 0);
            e->refs--;
            if (e->refs <= 0) {
                usage_ -= e->charge;
                (*e->deleter)(e->key(), e->value);
                free(e);
            }
        }

        // 删除节点
        void LRUCache::LRU_Remove(LRUHandle* e) {
            e->next->prev = e->prev;
            e->prev->next = e->next;
        }

        // 插入节点
        void LRUCache::LRU_Append(LRUHandle* e) {
            // Make "e" newest entry by inserting just before lru_
            e->next = &lru_;
            e->prev = lru_.prev;
            e->prev->next = e;
            e->next->prev = e;
        }

        Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
            MutexLock l(&mutex_);
            LRUHandle* e = table_.Lookup(key, hash); 							// 访问一次
            if (e != NULL) {
                e->refs++;
                LRU_Remove(e);   												// 删除，然后重新插入
                LRU_Append(e);
            }
            return reinterpret_cast<Cache::Handle*>(e);
        }

        Cache::Handle* LRUCache::Insert(										// 插入节点，如果存在旧的，则删除
        const Slice& key, uint32_t hash, void* value, size_t charge,
        void (*deleter)(const Slice& key, void* value)) {
            MutexLock l(&mutex_);

            LRUHandle* e = reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle)-1 + key.size()));
            e->value = value;
            e->deleter = deleter;
            e->charge = charge;
            e->key_length = key.size();
            e->hash = hash;
            e->refs = 2;  // One from LRUCache, one for the returned handle
            memcpy(e->key_data, key.data(), key.size());
            LRU_Append(e);   												// 加入LRU列表
            usage_ += charge;

            LRUHandle* old = table_.Insert(e);									// 如果存在旧的相同的key，则插入新的，返回旧的
            if (old != NULL) {
                LRU_Remove(old);
                Unref(old);
            }
            // 回收，next指向最近最少访问的节点
            while (usage_ > capacity_ && lru_.next != &lru_) {
                LRUHandle* old = lru_.next;
                LRU_Remove(old);
                table_.Remove(old->key(), old->hash);
                Unref(old);
            }

            return reinterpret_cast<Cache::Handle*>(e);
        }

        // 删除节点
        void LRUCache::Erase(const Slice& key, uint32_t hash) {
            MutexLock l(&mutex_);
            LRUHandle* e = table_.Remove(key, hash);
            if (e != NULL) 
            {
                LRU_Remove(e);
                Unref(e);
            }
        }
        // 减少引用计数
        void LRUCache::Release(Cache::Handle* handle) {
            MutexLock l(&mutex_);
            Unref(reinterpret_cast<LRUHandle*>(handle)); 					// 引用计数--
        }

levelDB中实际的LRUCache：

        static const int kNumShardBits = 4;
        static const int kNumShards = 1 << kNumShardBits;

        class ShardedLRUCache : public Cache {
          private:
            LRUCache shard_[kNumShards]; 								// 2^4个LRUCache
            port::Mutex id_mutex_;
            uint64_t last_id_;

            // 计算hash值
            static inline uint32_t HashSlice(const Slice& s) {				// 计算hash值
                return Hash(s.data(), s.size(), 0);
            }
            
            static uint32_t Shard(uint32_t hash) {						// 4bits，映射到哪个LRUCache
                return hash >> (32 - kNumShardBits);
            }

          public:
            explicit ShardedLRUCache(size_t capacity)					// 构造函数
            : last_id_(0) {
                const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards; // 平均每个LRUCache的容量
                for (int s = 0; s < kNumShards; s++) {
                    shard_[s].SetCapacity(per_shard);         				// 超过了容量时，再插入就淘汰数据
                }
            }
            virtual ~ShardedLRUCache() { }
            virtual Handle* Insert(const Slice& key, void* value, size_t charge,
            void (*deleter)(const Slice& key, void* value)) {				// 插入映射到的LRUCache和hashtable中
                const uint32_t hash = HashSlice(key);
                return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter); 
            }
            virtual Handle* Lookup(const Slice& key) {    				// 查找
                const uint32_t hash = HashSlice(key); 					// 计算hash值
                return shard_[Shard(hash)].Lookup(key, hash); 				// 在映射bucket中查找
            }
            virtual void Release(Handle* handle) {        				// 减少引用计数
                LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
                shard_[Shard(h->hash)].Release(handle);
            }
            virtual void Erase(const Slice& key) {  						// 删除
                const uint32_t hash = HashSlice(key);
                shard_[Shard(hash)].Erase(key, hash);
            }
            virtual void* Value(Handle* handle) {  						// 返回对应的值
                return reinterpret_cast<LRUHandle*>(handle)->value;
            }
            virtual uint64_t NewId() { 								// 新的id
                MutexLock l(&id_mutex_);
                return ++(last_id_);
            }
         };

对外创建LRUCache的接口：

    Cache* NewLRUCache(size_t capacity) 
    {
        return new ShardedLRUCache(capacity);
    }

巴山独钓

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录