介绍
缓存技术是用来解决高并发,大数据下的热点访问的性能问题。一般使用的方式有直接使用Hash缓存,但哈希冲突率与缓存表所占内存大小成反相关。设计一个内存使用少而缓存利用率较高的缓存成为了一个迫切需要关心的问题。布谷鸟缓存模仿布谷鸟的习性而设计,其查找流程只最多只需经过两次哈希查找,而其插入流程较慢,需要在多个“桶”中寻找空间。
设计
- 由缓存行数和列数生成缓存表CuckooBuffer
- 收到关键字key,先查找缓存: 查找缓存步骤: 步骤1、index=Hash1(key),sig=Hash2(key) 步骤2、遍历index对应的桶,桶中若第i个槽有CuckooBuffer[index][i].sig == sig则返回缓存项中的对象指针 步骤3、CuckooBuffer[index][i].sig != sig,则index^=sig%row,重复步骤2
- 插入缓存步骤: 步骤1:index=Hash1(key),sig=Hash2(key) 步骤2:遍历index对应的桶,如果有空槽则插入到该位置,否则进行步骤3 步骤3:index^=sig%row; 遍历index对应的槽,如果有空槽则插入到该位置,否则进行步骤4 步骤4:随机选中步骤1和步骤3种的index对应桶,随机选中桶中的槽位,将其中的缓存项踢出得到tindex和signature,并将信息插入到该缓存项中,之后进行步骤5 步骤5:tindex^=signature,index=tindex,sig=signature,之后进行步骤3,剔除次数+1,如果剔除次数大于100,则不再剔除。
- 扫描Cuckoo缓存中不活跃的缓存项,当满足缓存访问时间间隔>缓存超时阈值,则清空该缓存项,程序中直接用32个bit记录时间,单位为毫秒。
以上主要介绍了布谷鸟过滤器的操作流程,如果对其原理还不清楚的同学,请参考布谷鸟过滤器原论文或他人的博客,本文主要讲解操作流程和代码演示。
代码实现(Java)
package org.fanlu.sdn.openflow.util;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.locks.ReadWriteLock;
import org.fanlu.sdn.openflow.hashFunctions.HashFunctions;
/**
* 布谷鸟缓存类
* @param <E> 缓存对象的引用,需重写toString()方法,toString()方法中应包括缓存对象的独有特征。
* @author 路遥
*/
public class CuckooHashing<E> {
private CuckooBuffer<E>[][] cuckooBuffer;
private int tLimit;
private int row;
private int col;
private int currentBufferNum;
private int scanInterval;
private final static int KICKNUM = 100;
private InsertCuckooRunnable runnable = null;
private ReadWriteLock lock;
@SuppressWarnings("unchecked")
/**
* 布谷鸟缓存初始化
* @param row 布谷鸟行数
* @param col 布谷鸟列数
* @param tLimit 缓存项活跃时间阈值,以毫秒为单位
*/
public CuckooHashing(int row,int col,int tLimit){
this.tLimit = tLimit;
this.row = row;
this.col = col;
this.scanInterval = tLimit;
this.currentBufferNum = 0;
cuckooBuffer = new CuckooBuffer[row][col];
for (int i = 0; i < cuckooBuffer.length; i++) {
for (int j = 0; j < cuckooBuffer[i].length; j++) {
cuckooBuffer[i][j] = new CuckooBuffer<E>();
}
}
runnable = new InsertCuckooRunnable();
Thread t = new Thread(runnable);
t.setDaemon(true);
t.start();
this.cuckooScan();
}
/**
* 查找缓存项
* @param e
* @return
*/
public E query(E e){
try{
lock.readLock().lock();
int index = HashFunctions.additiveHash(e.toString())%this.row;
int sig = HashFunctions.rotatingHash(e.toString());
for (int i = 0; i < this.col; i++) {
if(cuckooBuffer[index][i].sig == sig){
cuckooBuffer[index][i].time = getCurrentTime();
return cuckooBuffer[index][i].e;
}
}
index ^= sig%this.row;
for(int i = 0;i<this.col;i++){
if(cuckooBuffer[index][i].sig == sig){
cuckooBuffer[index][i].time = getCurrentTime();
return cuckooBuffer[index][i].e;
}
}
}finally{
lock.readLock().unlock();
}
return null;
}
/**
* 布谷鸟缓存项插入线程,插入过程较慢,多线程操作可使得查找和插入过程并行
*/
private class InsertCuckooRunnable implements Runnable{
private LinkedBlockingQueue<E> que = new LinkedBlockingQueue<E>();
public void addElement(E e){
que.offer(e); //加入到队列中,如果队列满,等待一会,成功为true,失败为false
}
@Override
public void run() {
while(!Thread.currentThread().isInterrupted()){
try {
insert(que.take());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
/**
* 添加缓存项
* @param e 缓存内容的引用
*/
public void add(E e){
runnable.addElement(e);
}
/**
* 直接插入缓存项,直接使用主线程操作
* @param e 缓存内容的引用
* @return
*/
public boolean insert(E e){
try{
lock.writeLock().lock();
int index = HashFunctions.additiveHash(e.toString())%this.row;
int sig = HashFunctions.rotatingHash(e.toString());
int currentTime = getCurrentTime();
for(int i = 0;i< this.col;i++){
if(!cuckooBuffer[index][i].isExist()){
currentBufferNum++;
cuckooBuffer[index][i].init(sig, currentTime, e);
return true;
}
}
index ^= sig%this.row;
for(int i = 0;i<this.col;i++){
if(!cuckooBuffer[index][i].isExist()){
currentBufferNum++;
cuckooBuffer[index][i].init(sig, currentTime, e);
return true;
}
}
//若两个桶都满了,那么随机选一个踢出
int randomIndex = (int)Math.random()%2;
if(randomIndex == 0)index ^= sig%this.row;
CuckooBuffer<E> buffer = new CuckooBuffer<E>();
buffer.init(sig, currentTime, e);
CuckooBuffer<E> flag = new CuckooBuffer<E>();
for(int i = 0;i<KICKNUM;i++){
int r = (int)Math.random()%this.col;
flag.init(cuckooBuffer[index][r].sig, cuckooBuffer[index][r].time, cuckooBuffer[index][r].e);
cuckooBuffer[index][r].init(buffer.sig, buffer.time, buffer.e);
buffer.init(flag.sig, flag.time, flag.e);
index ^= flag.sig%this.row;
for(int j=0;j<this.col;j++){
if(!cuckooBuffer[index][j].isExist()){
cuckooBuffer[index][j].init(buffer.sig, buffer.time, buffer.e);
return true;
}
}
}
}finally{
lock.writeLock().lock();
}
return false;
}
/**
* 删除缓存项
* @param e 缓存项的引用
* @return
*/
public boolean delete(E e){
int index = HashFunctions.additiveHash(e.toString())%this.row;
int sig = HashFunctions.rotatingHash(e.toString());
return delete(index,sig);
}
/**
* 删除缓存项
* @param index 布谷鸟缓存行索引
* @param sig 缓存项标签
* @return
*/
public boolean delete(int index,int sig){
try{
lock.writeLock().lock();
for (int i = 0; i < this.col; i++) {
if(cuckooBuffer[index][i].sig == sig){
cuckooBuffer[index][i].clean();
currentBufferNum --;
return true;
}
}
index ^= sig%this.row;
for(int i = 0;i<this.col; i++){
if(cuckooBuffer[index][i].sig == sig){
cuckooBuffer[index][i].clean();
currentBufferNum --;
return true;
}
}
}finally{
lock.writeLock().lock();
}
return false;
}
/**
* 定时扫描,清理不活跃的缓存项
*/
private void cuckooScan(){
Timer timer = new Timer(true);
TimerTask task = new TimerTask() {
@Override
public void run() {
try{
lock.writeLock().lock();
for (int i = 0; i < cuckooBuffer.length; i++) {
for (int j = 0; j < cuckooBuffer[i].length; j++) {
if(getCurrentTime() - cuckooBuffer[i][j].time > tLimit){
cuckooBuffer[i][j].clean(); }
}
}
}finally{
lock.writeLock().lock();
}
}
};
timer.schedule(task, scanInterval, scanInterval);
}
/**
* 获取当前系统时间的后32位
* @return
*/
private int getCurrentTime(){
return (int) (System.currentTimeMillis() & 0xffffffff);
}
/**
* 缓存项
* @param <E> 缓存的引用
*/
@SuppressWarnings("hiding")
private class CuckooBuffer<E>{
public int sig; //标签
public int time; //命中缓存项的时间
public E e; //缓存引用
public boolean isExist(){
if(e != null)return true;
else return false;
}
public void init(int sig, int time, E e){
this.sig = sig;
this.time = time;
this.e = e;
}
public void clean(){
sig = 0;
time = 0;
e = null;
}
}
}