LevelDB之Cache代码

最新推荐文章于 2021-11-25 23:36:23 发布

lingedeng

最新推荐文章于 2021-11-25 23:36:23 发布

阅读量294

点赞数

分类专栏： LevelDB 文章标签： leveldb cache src

LevelDB 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

LevelDB license：

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

Cache.h

#include <string>
#include "stubs/Common.h"

struct LRUHandle {
	//for LRUCache
	LRUHandle* prev;
	LRUHandle* next;
	uint32 refs;
	bool in_cache;
	uint32 charge;

	void *data;
	void (*deleter)(const std::string&, void* value);
	//std::string data;
	LRUHandle *next_hash;
	uint32 key_length;
	uint32 hash;
	char key[1];

	std::string GetKey() const {		
		return std::string(key, key_length);
	}
};

class HandleTable {
public:
	HandleTable();
	~HandleTable();

	LRUHandle* Lookup(const std::string& key, uint32 hash);
	LRUHandle* Insert(LRUHandle *h);
	LRUHandle* Remove(const std::string& key, uint32 hash);

	uint32 GetLength() const { return length_; }
	uint32 GetElemCount() const { return elems_; }
private:
	//根据key及hash值获得在hash表中的位置
	LRUHandle** FindPoint(const std::string& key, uint32 hash);
	//扩展hash表的桶个数并进行重新hash
	void Resize();

	uint32 length_; //hash桶个数
	uint32 elems_;  //hash表中元素个数
	LRUHandle **list_; //指向hash表的指针
};

class Cache {
public:
	Cache() { }

	// Destroys all existing entries by calling the "deleter"
	// function that was passed to the constructor.
	virtual ~Cache();

	// Opaque handle to an entry stored in the cache.
	struct Handle { };

	// Insert a mapping from key->value into the cache and assign it
	// the specified charge against the total cache capacity.
	//
	// Returns a handle that corresponds to the mapping.  The caller
	// must call this->Release(handle) when the returned mapping is no
	// longer needed.
	//
	// When the inserted entry is no longer needed, the key and
	// value will be passed to "deleter".
	virtual Handle* Insert(const std::string& key, void* value, size_t charge,
		void (*deleter)(const std::string& key, void* value)) = 0;

	// If the cache has no mapping for "key", returns NULL.
	//
	// Else return a handle that corresponds to the mapping.  The caller
	// must call this->Release(handle) when the returned mapping is no
	// longer needed.
	virtual Handle* Lookup(const std::string& key) = 0;

	// Release a mapping returned by a previous Lookup().
	// REQUIRES: handle must not have been released yet.
	// REQUIRES: handle must have been returned by a method on *this.
	virtual void Release(Handle* handle) = 0;

	// Return the value encapsulated in a handle returned by a
	// successful Lookup().
	// REQUIRES: handle must not have been released yet.
	// REQUIRES: handle must have been returned by a method on *this.
	virtual void* Value(Handle* handle) = 0;

	// If the cache contains entry for key, erase it.  Note that the
	// underlying entry will be kept around until all existing handles
	// to it have been released.
	virtual void Erase(const std::string& key) = 0;

	// Return a new numeric id.  May be used by multiple clients who are
	// sharing the same cache to partition the key space.  Typically the
	// client will allocate a new id at startup and prepend the id to
	// its cache keys.
	virtual uint64 NewId() = 0;

	// Remove all cache entries that are not actively in use.  Memory-constrained
	// applications may wish to call this method to reduce memory usage.
	// Default implementation of Prune() does nothing.  Subclasses are strongly
	// encouraged to override the default implementation.  A future release of
	// leveldb may change Prune() to a pure abstract method.
	virtual void Prune() {}

	// Return an estimate of the combined charges of all elements stored in the
	// cache.
	virtual size_t TotalCharge() const = 0;

private:
	void LRU_Remove(Handle* e);
	void LRU_Append(Handle* e);
	void Unref(Handle* e);

	struct Rep;
	Rep* rep_;

	// No copying allowed
	Cache(const Cache&);
	void operator=(const Cache&);
};

// Create a new cache with a fixed size capacity.  This implementation
// of Cache uses a least-recently-used eviction policy.
Cache* NewLRUCache(size_t capacity);

#endif //CACHE_H_

Cache.cpp

#include "Cache.h"

//HandleTable
HandleTable::HandleTable() 
	: length_(0), elems_(0), list_(NULL) {
	Resize();
}

HandleTable::~HandleTable() {
	if (list_)
		delete[] list_;
}

LRUHandle* HandleTable::Lookup(const std::string& key, uint32 hash) {
	return *FindPoint(key, hash);
}

LRUHandle* HandleTable::Insert(LRUHandle *h) {
	//找到需要插入的位置，如果没有相同的key或hash,则返回列表的尾部，即NULL
	//如果找到则表示有重复的key或hash,这时使用新的LRUHandle代替原先的值，即old
	LRUHandle** ptr = FindPoint(h->GetKey(), h->hash);
	LRUHandle* old = *ptr;
	h->next_hash = (old == NULL) ? NULL : old->next_hash;
	*ptr = h;
	if (old == NULL) {
		//如果没有找到key或hash对应的entry，则增加hash表中元素个数
		//当old不为空时表示有重复的key或hash，则进行替换
		elems_++;
		if (elems_ > length_) {
			Resize();
		}
	}
	return old;
}

LRUHandle* HandleTable::Remove(const std::string& key, uint32 hash) {
	LRUHandle** ptr = FindPoint(key, hash);
	LRUHandle* result = *ptr;
	if (result != NULL) {
		*ptr = result->next_hash;
		elems_--;
	}

	return result;
}

LRUHandle** HandleTable::FindPoint(const std::string& key, uint32 hash) {
	//找到需要插入的点
	//没有找到具有相同key或hash的值时，返回NULL
	//找到具有相同key或hash的值时，返回对应的结点
	LRUHandle** ptr = &list_[hash & (length_- 1)];
	while ((*ptr != NULL) &&
			((*ptr)->hash != hash || (*ptr)->GetKey() != key)) {
		ptr = &(*ptr)->next_hash;
	}

	return ptr;
}

void HandleTable::Resize() {
	uint32 new_length = 4;
	if (new_length < elems_)
		new_length *= 2;

	LRUHandle** new_list = new LRUHandle*[new_length];
	memset(new_list, 0, sizeof(new_list[0])*new_length);
	uint32 count = 0;
	for (int i=0; i<length_; i++) {
		LRUHandle* h = list_[i];
		while (h != NULL) {
			//获得桶中下一个LRUHandle*, 保证下一个得到处理
			LRUHandle* next = h->next_hash;
			//根据新桶的大小进行rehash
			uint32 hash = h->hash;
			LRUHandle** ptr = &new_list[hash & (new_length-1)];
			//调整新桶中的当前节点的下一个节点
			h->next_hash = *ptr;
			//调整新桶中头节点指向新加入的节点
			*ptr = h;

			h = next;
			count++;
		}
	}

	assert(count == elems_);
	delete[] list_;
	list_ = new_list;
	length_ = new_length;
}

//LRUCache
class LRUCache {
public:
	LRUCache();
	~LRUCache();

	// Separate from constructor so caller can easily make an array of LRUCache
	void SetCapacity(size_t capacity) { capacity_ = capacity; }

	// Like Cache methods, but with an extra "hash" parameter.
	Cache::Handle* Insert(const std::string& key, uint32 hash,
		void* value, size_t charge,
		void (*deleter)(const std::string& key, void* value));
	Cache::Handle* Lookup(const std::string& key, uint32 hash);
	void Release(Cache::Handle* handle);
	void Erase(const std::string& key, uint32 hash);
	void Prune();
	size_t TotalCharge() const {
		MutexLock l(&mutex_);
		return usage_;
	}

private:
	void LRU_Remove(LRUHandle* e);
	void LRU_Append(LRUHandle*list, LRUHandle* e);
	void Ref(LRUHandle* e);
	void Unref(LRUHandle* e);
	bool FinishErase(LRUHandle* e);

	// Initialized before use.
	size_t capacity_;

	// mutex_ protects the following state.
	mutable Mutex mutex_;
	size_t usage_;

	// Dummy head of LRU list.
	// lru.prev is newest entry, lru.next is oldest entry.
	// Entries have refs==1 and in_cache==true.
	LRUHandle lru_;

	// Dummy head of in-use list.
	// Entries are in use by clients, and have refs >= 2 and in_cache==true.
	LRUHandle in_use_;

	HandleTable table_;
};

LRUCache::LRUCache() : usage_(0) {
	lru_.prev = &lru_;
	lru_.next = &lru_;

	in_use_.prev = &in_use_;
	in_use_.next = &in_use_;
}

LRUCache::~LRUCache() {
	assert(in_use_.next == &in_use_);
	for (LRUHandle* e = lru_.next; e != &lru_; ) {
		LRUHandle* next = e->next;
		assert(e->in_cache);
		e->in_cache = false;
		assert(e->refs == 1);
		Unref(e);

		e = next;
	}
}

void LRUCache::Ref(LRUHandle *e) {
	if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
		LRU_Remove(e);
		LRU_Append(&in_use_, e);		
	}
	e->refs++;
}

void LRUCache::Unref(LRUHandle* e) {
	assert(e->refs > 0);
	e->refs--;
	if (e->refs == 0) {
		(*e->deleter)(e->GetKey(), e->data);
		free(e);
	} else if (e->in_cache && e->refs == 1) { // No longer in use; move to lru_ list.
		LRU_Remove(e);
		LRU_Append(&lru_, e);
	}
}

void LRUCache::LRU_Remove(LRUHandle* e) {
	e->prev->next = e->next;
	e->next->prev = e->prev;
}

void LRUCache::LRU_Append(LRUHandle*list, LRUHandle* e) {
	// Make "e" newest entry by inserting just before *list
	e->next = list;
	e->prev = list->prev;
	e->prev->next = e;
	e->next->prev = e;
}

Cache::Handle* LRUCache::Lookup(const std::string& key, uint32 hash) {
	MutexLock locker(&mutex_);
	LRUHandle* e = table_.Lookup(key, hash);
	if (e != NULL) {
		Ref(e);
	}

	return reinterpret_cast<Cache::Handle*>(e);
}

void LRUCache::Release(Cache::Handle* handle) {
	MutexLock l(&mutex_);
	Unref(reinterpret_cast<LRUHandle *>(handle));
}

Cache::Handle* LRUCache::Insert(const std::string& key, uint32 hash, 
	void* value, size_t charge, void (*deleter)(const std::string& key, void* value)) {
	MutexLock l(&mutex_);

	LRUHandle* e = reinterpret_cast<LRUHandle *>(malloc(sizeof(LRUHandle)-1 + key.size()));
	e->data = value;
	e->deleter = deleter;
	e->hash = hash;
	e->key_length = key.size();	
	e->charge = charge;
	e->in_cache = false;
	e->refs = 1; // for the returned handle.
	memcpy(e->key, key.c_str(), key.size());


	if (capacity_ > 0) {
		e->refs++; // for the cache's reference.
		e->in_cache = true;
		LRU_Append(&in_use_, e);
		usage_ += charge;
		FinishErase(table_.Insert(e));
	} else {
		e->next = NULL;
	}

	while (usage_ > capacity_ && lru_.next != &lru_) {
		LRUHandle* old = lru_.next;
		assert(old->refs == 1);
		bool erased = FinishErase(table_.Remove(old->GetKey(), old->hash));
		if (!erased) {  // to avoid unused variable when compiled NDEBUG
			assert(erased);
		}
	}

	return reinterpret_cast<Cache::Handle *>(e);
}

// If e != NULL, finish removing *e from the cache; it has already been removed
// from the hash table.  Return whether e != NULL.  Requires mutex_ held.
bool LRUCache::FinishErase(LRUHandle* e) {
	if (e != NULL) {
		assert(e->in_cache);
		LRU_Remove(e);
		e->in_cache = false;
		usage_ -= e->charge;
		Unref(e);
	}

	return e != NULL;
}

void LRUCache::Erase(const std::string& key, uint32 hash) {
	MutexLock l(&mutex_);
	FinishErase(table_.Remove(key, hash));
}

void LRUCache::Prune() {
	MutexLock l(&mutex_);
	while (lru_.next != &lru_) {
		LRUHandle* e = lru_.next;
		assert(e->refs == 1);
		bool erased = FinishErase(table_.Remove(e->GetKey(), e->hash));

		if (!erased) {  // to avoid unused variable when compiled NDEBUG
			assert(erased);
		}
	}
}

Cache::~Cache(void)
{
}

static const int kNumShardBits = 4;
static const int kNumShards = 1 << kNumShardBits;

class ShardedLRUCache : public Cache {
private:
	LRUCache shard_[kNumShards];
	Mutex id_mutex_;
	uint64 last_id_;

	static inline uint32 HashString(const std::string &key) {
		return Hash(key.c_str(), key.size(), 0);
	}

	static uint32 Shard(uint32 hash) {
		return hash >> (32 - kNumShardBits);
	} 

public:
	explicit ShardedLRUCache::ShardedLRUCache(uint32 capacity) : last_id_(0) {
		const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
		for (int s= 0; s < kNumShards; s++) {
			shard_[s].SetCapacity(per_shard);
		}
	}

	virtual ShardedLRUCache::~ShardedLRUCache() {}

	virtual Handle* Insert(const std::string& key, void* value, size_t charge, 
							void (*deleter)(const std::string& key, void* value)) {
		const uint32 hash = HashString(key);
		return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
	}

	virtual Handle* Lookup(const std::string& key) {
		const uint32 hash = HashString(key); 
		return shard_[Shard(hash)].Lookup(key, hash);
	}

	virtual void Release(Handle* handle) {
		LRUHandle* h = reinterpret_cast<LRUHandle *>(handle);
		shard_[Shard(h->hash)].Release(handle);
	}

	virtual void Erase(const std::string& key) {
		const uint32 hash = HashString(key); 
		return shard_[Shard(hash)].Erase(key, hash);
	}

	virtual void* Value(Handle *handle) {
		return reinterpret_cast<LRUHandle *>(handle)->data;
	}

	virtual uint64 NewId() {
		MutexLock l(&id_mutex_);
		return ++(last_id_);
	}

	virtual void Prune() {
		for (int s = 0; s < kNumShards; s++) {
			shard_[s].Prune();
		}
	}

	virtual size_t TotalCharge() const {
		size_t total = 0;
		for (int s = 0; s < kNumShards; s++) {
			total += shard_[s].TotalCharge();
		}
		return total;
	}
};

Cache* NewLRUCache(size_t capacity) {
	return new ShardedLRUCache(capacity);
}

Cache unittest (使用gtest):

TEST(HandleTable, resize) {
	HandleTable htable;

	ASSERT_EQ(htable.GetElemCount(), 0);
	ASSERT_EQ(htable.GetLength(), 4);

	std::string keys[] = {"name", "whoami", "address", "eman", "imaohw", "sserdda", "handletable"};

	for (int i=0; i<7; i++) {
		LRUHandle *h = reinterpret_cast<LRUHandle *>(malloc(sizeof(LRUHandle)-1 + keys[i].size()));
		h->hash = Hash(keys[i].c_str(), keys[i].size(), 0);
		h->key_length = keys[i].size();
		memcpy(h->key, keys[i].c_str(), keys[i].size());		
		
		printf("key: %s, hash:%u, index:%d\n", keys[i].c_str(), h->hash, (h->hash & htable.GetLength()-1));
		h->data = NULL;
		htable.Insert(h);

		if (i == 4) {
			//已调用rehash
			ASSERT_EQ(htable.GetElemCount(), 5);
			uint32 len = htable.GetLength();
			printf("HandleTable length:%u\n", len);
			ASSERT_EQ(len, 8);
		}
	}
	
	ASSERT_EQ(htable.GetElemCount(), 7);
	uint32 len = htable.GetLength();
	printf("HandleTable length:%u\n", len);
	ASSERT_EQ(len, 8);
}


static std::string EncodeFix32Int(int k) {
	std::string result;
	PutFixed32(&result, k);
	return result;
}

static int DecodeFix32Int(const std::string& k) {
	assert(k.size() == 4);
	return DecodeFixed32(k.c_str());
}

static void* EncodeValue(uintptr_t v) { return reinterpret_cast<void *>(v); }
static int DecodeValue(void* v) { return reinterpret_cast<uintptr_t>(v); }

class CacheTest : public testing::Test {
public:
	static CacheTest* current_;

	static void Deleter(const std::string& key, void* v) {
		current_->deleted_keys.push_back(DecodeFix32Int(key));
		current_->deleted_values.push_back(DecodeValue(v));
	}

	static const int kCacheSize = 1000;
	std::vector<int> deleted_keys;
	std::vector<int> deleted_values;

	Cache* cache_;

	CacheTest() : cache_(NewLRUCache(kCacheSize)) {
		current_ = this;
	}

	~CacheTest() {
		delete cache_;
	}

	int Lookup(int key) {
		Cache::Handle* handle = cache_->Lookup(EncodeFix32Int(key));
		const int r = (handle == NULL) ? -1 : DecodeValue(cache_->Value(handle));
		if (handle != NULL) {
			cache_->Release(handle);
		}

		return r;
	}

	void Insert(int key, int value, int charge = 1) {
		cache_->Release(cache_->Insert(EncodeFix32Int(key), EncodeValue(value), charge, &CacheTest::Deleter));
	}

	Cache::Handle* InsertAndReturnHandle(int key, int value, int charge = 1) {
		return cache_->Insert(EncodeFix32Int(key), EncodeValue(value), charge, &CacheTest::Deleter);
	}

	void Erase(int key) {
		cache_->Erase(EncodeFix32Int(key));
	}
};

CacheTest* CacheTest::current_;

TEST_F(CacheTest, HitAndMiss) {
	ASSERT_EQ(-1, Lookup(100));

	Insert(100, 101);
	ASSERT_EQ(101, Lookup(100));
	ASSERT_EQ(-1, Lookup(200));
	ASSERT_EQ(-1, Lookup(300));

	Insert(200, 201);
	ASSERT_EQ(101, Lookup(100));
	ASSERT_EQ(201, Lookup(200));
	ASSERT_EQ(-1, Lookup(300));

	Insert(100, 102);
	ASSERT_EQ(102, Lookup(100));
	ASSERT_EQ(201, Lookup(200));
	ASSERT_EQ(-1, Lookup(300));

	ASSERT_EQ(1, deleted_keys.size());
	ASSERT_EQ(100, deleted_keys[0]);
	ASSERT_EQ(101, deleted_values[0]);
}

TEST_F(CacheTest, Erase) {
	Erase(200);
	ASSERT_EQ(0, deleted_keys.size());

	Insert(100, 101);
	Insert(200, 201);
	Erase(100);
	ASSERT_EQ(-1, Lookup(100));
	ASSERT_EQ(201, Lookup(200));
	ASSERT_EQ(1, deleted_keys.size());
	ASSERT_EQ(100, deleted_keys[0]);
	ASSERT_EQ(101, deleted_values[0]);

	Erase(100);
	ASSERT_EQ(-1,  Lookup(100));
	ASSERT_EQ(201, Lookup(200));
	ASSERT_EQ(1, deleted_keys.size());
}

TEST_F(CacheTest, EntriesArePinned) {
	Insert(100, 101);
	Cache::Handle* h1 = cache_->Lookup(EncodeFix32Int(100));
	ASSERT_EQ(101, DecodeValue(cache_->Value(h1)));

	Insert(100, 102);
	Cache::Handle* h2 = cache_->Lookup(EncodeFix32Int(100));
	ASSERT_EQ(102, DecodeValue(cache_->Value(h2)));
	ASSERT_EQ(0, deleted_keys.size());

	cache_->Release(h1);
	ASSERT_EQ(1, deleted_keys.size());
	ASSERT_EQ(100, deleted_keys[0]);
	ASSERT_EQ(101, deleted_values[0]);

	Erase(100);
	ASSERT_EQ(-1, Lookup(100));
	ASSERT_EQ(1, deleted_keys.size());

	cache_->Release(h2);
	ASSERT_EQ(2, deleted_keys.size());
	ASSERT_EQ(100, deleted_keys[1]);
	ASSERT_EQ(102, deleted_values[1]);
}

TEST_F(CacheTest, EvictionPolicy) {
	Insert(100, 101);
	Insert(200, 201);
	Insert(300, 301);
	Cache::Handle* h = cache_->Lookup(EncodeFix32Int(300));

	// Frequently used entry must be kept around,
	// as must things that are still in use.
	for (int i = 0; i < kCacheSize + 100; i++) {
		Insert(1000+i, 2000+i);
		ASSERT_EQ(2000+i, Lookup(1000+i));
		ASSERT_EQ(101, Lookup(100));
	}
	ASSERT_EQ(101, Lookup(100));
	ASSERT_EQ(-1, Lookup(200));
	ASSERT_EQ(301, Lookup(300));
	cache_->Release(h);
}


TEST_F(CacheTest, UseExceedsCacheSize) {
	// Overfill the cache, keeping handles on all inserted entries.
	std::vector<Cache::Handle*> h;
	for (int i = 0; i < kCacheSize + 100; i++) {
		h.push_back(InsertAndReturnHandle(1000+i, 2000+i));
	}

	// Check that all the entries can be found in the cache.
	for (int i = 0; i < h.size(); i++) {
		ASSERT_EQ(2000+i, Lookup(1000+i));
	}

	for (int i = 0; i < h.size(); i++) {
		cache_->Release(h[i]);
	}
}

TEST_F(CacheTest, HeavyEntries) {
	// Add a bunch of light and heavy entries and then count the combined
	// size of items still in the cache, which must be approximately the
	// same as the total capacity.
	const int kLight = 1;
	const int kHeavy = 10;
	int added = 0;
	int index = 0;
	while (added < 2*kCacheSize) {
		const int weight = (index & 1) ? kLight : kHeavy;
		Insert(index, 1000+index, weight);
		added += weight;
		index++;
	}

	int cached_weight = 0;
	for (int i = 0; i < index; i++) {
		const int weight = (i & 1 ? kLight : kHeavy);
		int r = Lookup(i);
		if (r >= 0) {
			cached_weight += weight;
			ASSERT_EQ(1000+i, r);
		}
	}
	ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10);
}

TEST_F(CacheTest, NewId) {
	uint64_t a = cache_->NewId();
	uint64_t b = cache_->NewId();
	ASSERT_NE(a, b);
}

TEST_F(CacheTest, Prune) {
	Insert(1, 100);
	Insert(2, 200);

	Cache::Handle* handle = cache_->Lookup(EncodeFix32Int(1));
	ASSERT_TRUE(handle);
	cache_->Prune();
	cache_->Release(handle);

	ASSERT_EQ(100, Lookup(1));
	ASSERT_EQ(-1, Lookup(2));
}

TEST_F(CacheTest, ZeroSizeCache) {
	delete cache_;
	cache_ = NewLRUCache(0);

	Insert(1, 100);
	ASSERT_EQ(-1, Lookup(1));
}

其它涉及函数，hash函数，锁等可以自由选择：

void EncodeFixed32(char *buf, uint32_t value) {
	//little endian	
	memcpy(buf, &value, sizeof(value));	
}

void PutFixed32(std::string *dst, uint32_t value) {
	char buf[sizeof(value)];
	EncodeFixed32(buf, value);
	dst->append(buf, sizeof(buf));
}

uint32_t DecodeFixed32(const char* ptr) {
	//little endian
	uint32_t value;
	memcpy(&value, ptr, sizeof(value));
	return value;
}

：

lingedeng

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
LevelDB之Cache代码

LevelDB license：// Copyright (c) 2011 The LevelDB Authors. All rights reserved.// Use of this source code is governed by a BSD-style license that can be// found in the LICENSE file. See the AUTHOR...
复制链接

扫一扫

专栏目录