一:应用背景
分布式系统中的计算节点的输入参数是大量重复的文件,这些文件通常在10M-300M之间,如果每次从磁盘读取将会在读取数据上耗费大量的时间。解决这个问题解决方案的一个方法就是设计一个缓存机制,在内存中保存最近使用的文件,下次计算任务到来先检查缓存是否有此文件,有则直接读取缓存,而没必要读取磁盘文件!
二:LRU最近最少使用缓存算法
该算法的实现主要使用了双向链表和一个map容器,双向链表主要存储缓存信息节点,map用法快速查找一个信息节点(这里你可以采用查找更高效的hash_map,考虑系统一致性这里没有采用hash_map)。
三:LRU缓存源码
3.1 cacheLRU.h
/*!
* \file cacheLRU.h
* \brief 计算节点数据缓存
*
* \author ****
* \version 1.0
* \date 2016.7.9
*/
#ifndef __LRUCACHE_H__
#define __LRUCACHE_H__
#include <string.h>
#include <pthread.h>
#include <map>
#include <iostream>
#define CHARSET_NAME_LEN 128
using std::map;
using std::string;
typedef struct Node{
char key[CHARSET_NAME_LEN];
char *data;//动态分配
long bufferSpace;//size of the data
long high;//total num of the data
long width;//length of each pass int the data
Node *prev, *next;
}Node;
class LRUCache{
public:
LRUCache(long total_pace, bool is_pthread_safe = false){
totalSpace=availableSpace=total_pace;
nodeNums=0;
pthread_safe = is_pthread_safe;
if(pthread_safe)
pthread_mutex_init(&cached_mutex , NULL);
head = new Node;
tail = new Node;
head->prev = NULL;
head->next = tail;
tail->prev = head;
tail->next = NULL;
}
LRUCache(){}
~LRUCache(){
if(pthread_safe)
pthread_mutex_destroy(&cached_mutex);
Node *delNode=head->next;
while(delNode!=tail){
Node *temNode=delNode->next;
delete delNode->data;
delete delNode;
delNode=temNode;
}
delete head;
delete tail;
availableSpace=0;
}
bool init(long total_pace, bool is_pthread_safe = false)
{
totalSpace=availableSpace=total_pace;
nodeNums=0;
pthread_safe = is_pthread_safe;
if(pthread_safe)
pthread_mutex_init(&cached_mutex , NULL);
head = new Node;
tail = new Node;
head->prev = NULL;
head->next = tail;
tail->prev = head;
tail->next = NULL;
}
void setPthread_safe(bool flag)
{
pthread_safe=flag;
}
bool Put(const char *key, char *data,long size,long dictHigh);
Node * Get(const char *key);
void viewCacheList();
private:
void cached_lock(void){
if(pthread_safe)
pthread_mutex_lock(&cached_mutex);
}
void cached_unlock(void){
if(pthread_safe)
pthread_mutex_unlock(&cached_mutex);
}
void detach(Node* node){
node->prev->next = node->next;
node->next->prev = node->prev;
}
void attach(Node* node){
node->prev = head;
node->next = head->next;
head->next = node;
node->next->prev = node;
}
private:
map<string, Node* > cached_map;
Node * head, *tail;
long totalSpace;//total size of cache
long availableSpace;//available space for new cache
int nodeNums;//the total nums of nodes.
bool pthread_safe;
pthread_mutex_t cached_mutex;
};
#endif
3.2 cacheLRU.cpp
#include <iostream>
#include "cacheLRU.h"
using namespace std;
void LRUCache::viewCacheList()
{
Node *delNode=head->next;
std::cout<<"availableSpace::"<<availableSpace<<" totalSpace::"<<totalSpace<<"nodeNums::"<<nodeNums<<endl;
while(delNode!=tail){
std::cout<<"cacheName::"<<string(delNode->key)<<" size::"<<delNode->bufferSpace<<std::endl;
delNode=delNode->next;
}
}
bool LRUCache::Put(const char *key , char *data ,long data_size,long dictHigh){
cached_lock();
Node *node = cached_map[string(key)];
if(node){
detach(node);
node->data = data;
attach(node);
return true;
}
else{
/*新插入缓存必须检查缓冲区大小,如果可用容量不足,则删除最近最少使用的缓存,直至有充足的缓存空间*/
while(availableSpace<data_size)
{
Node *temNode=tail->prev;
if(temNode!=head)
{
cout<<"delete cache::"<<string(temNode->key)<<" size::"<<temNode->bufferSpace<<endl;
temNode->prev->next=tail;
tail->prev=temNode->prev;
availableSpace+=temNode->bufferSpace;
--nodeNums;
delete temNode->data;//回收内存
delete temNode;
temNode=NULL;
}else
return false;
}
node=new Node;
memcpy(node->key, key, CHARSET_NAME_LEN);
node->data=new char[data_size];
memcpy(node->data, data, data_size);
node->bufferSpace=data_size;
node->high=dictHigh;
cached_map[string(key)] = node;
attach(node);
availableSpace-=data_size;
++nodeNums;
return true;
}
cached_unlock();
}
Node * LRUCache::Get(const char *key){
cached_lock();
Node *node = cached_map[key];
if(node){
detach(node);
attach(node);
cached_unlock();
return node;
}
else{
cached_unlock();
return NULL;
}
}