学习笔记:三数组实现的HashMap

网上的一个HashMap代码,用三个数组实现,不同于jdk中的实现方式。处理哈希冲突是采用二次哈希(再哈希)的策略,学习了一把,个别地方可能没有理解到位。写了一些注释,如果有错误,敬请指出。


public final class LongHashMap {


protected long table[];//存放键,类型为long,应该是用于特殊场所
protected Object values[];//存放值
protected byte state[];//state[i]=0,1,2表示table[i]与values[i]没有使用,已经使用,已删除
protected int freeEntries;//空闲的空间数

protected int distinct;//当前存了多少对键值

protected int lowWaterMark;//当LongHashMap存放的键值对少于此数时,将重新调整(再哈希)
protected int highWaterMark;//当LongHashMap存放的键值对大于此数时,将重新调整,由容量和装载因子决定


protected double minLoadFactor;//最小装载因子
protected double maxLoadFactor;//最大装载因子

// 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
//而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。
protected static final int DEFAULT_CAPACITY = 277;//缺省的容量,一个素数
protected static final double DEFAULT_MIN_LOAD_FACTOR = 0.2;//缺省的最小的装载因子
protected static final double DEFAULT_MAX_LOAD_FACTOR = 0.6;//缺省的最大的装载因子

protected static final byte FREE = 0;
protected static final byte FULL = 1;
protected static final byte REMOVED = 2;

//用缺省的容量构建HashMap
public LongHashMap() {
this(DEFAULT_CAPACITY);
}

//构造函数
public LongHashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR);
}

//构造函数
public LongHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
setUp(initialCapacity,minLoadFactor,maxLoadFactor);
}

//使用指定的初始化容量,最小装载因子,最大装载因子构建LongHashMap
protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
if (initialCapacity < 0) {//参数检查
throw new IllegalArgumentException(
"Initial Capacity must not be less than zero: "+ initialCapacity
);
}
if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) {
throw new IllegalArgumentException(
"Illegal minLoadFactor: "+ minLoadFactor
);
}
if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) {
throw new IllegalArgumentException(
"Illegal maxLoadFactor: "+ maxLoadFactor
);
}
if (minLoadFactor >= maxLoadFactor) {
throw new IllegalArgumentException(
"Illegal minLoadFactor: " + minLoadFactor +
" and maxLoadFactor: " + maxLoadFactor
);
}

int capacity = initialCapacity;
capacity = nextPrime(capacity);//程序将调整初始化容量,使之为素数

if (capacity==0) {
capacity=1;
}

this.table = new long[capacity];//关键字数组
this.values = new Object[capacity];//值数组
this.state = new byte[capacity];//状态数组


this.minLoadFactor = minLoadFactor;

if (capacity == LARGEST_PRIME) this.maxLoadFactor = 1.0;
else this.maxLoadFactor = maxLoadFactor;

this.distinct = 0;//开始时,LongHashMap中没有存入键值对
this.freeEntries = capacity; // 开始时空闲的空间数=容量

this.lowWaterMark = 0;

// Math.min(capacity-2, (int) (capacity * maxLoadFactor));
this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
}


//扩容量,一个素数
private int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
return nextPrime(Math.max(size+1, (int) ((4*size / (3*minLoad+maxLoad)))));
}

public boolean put(long key, Object value) {//在LongHashMap中存放键值对
int i = indexOfInsertion(key);
if (i<0) {
i = -i -1;
this.values[i]=value;
return false;
}
if (this.distinct > this.highWaterMark) {//当存的健值对超过highWaterMark时,将重新构建LongHashMap
int newCapacity = chooseGrowCapacity(
this.distinct+1,
this.minLoadFactor,
this.maxLoadFactor
);
rehash(newCapacity);//用新容量重新构造
return put(key, value);
}

this.table[i]=key;
this.values[i]=value;
if (this.state[i]==FREE) this.freeEntries--;//剩余空间少了一个
this.state[i]=FULL;
this.distinct++;//当前存放的键值对数目加1

if (this.freeEntries < 1) {
int newCapacity = chooseGrowCapacity(
this.distinct+1,
this.minLoadFactor,
this.maxLoadFactor
);
rehash(newCapacity);//用新容量重新构造
}

return true;
}


//求关键字key的索引值,用于插入(添加)
private final int indexOfInsertion(long key) {
final long tab[] = table;
final byte stat[] = state;
final int length = tab.length;
//这就是哈希函数了
final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
int i = hash % length;

//发生哈希冲突时,用于再哈希探测的步长
int decrement = (hash) % (length-2);

if (decrement == 0) decrement = 1;
//stat[i]有三种情况
while (stat[i] == FULL && tab[i] != key) {//第一种,发生哈希冲突,往前探测
i -= decrement;
if (i<0) i+=length;
}

if (stat[i] == REMOVED) {//第二种,此位置原来的键值对已删除

int j = i;
//有意思,已删除的位置并不用来存新的
while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
i -= decrement;
if (i<0) i+=length;
}
if (stat[i] == FREE) i = j;
}

if (stat[i] == FULL) {//第三种,这种情况会出现吗?

return -i-1;
}
//第三种,stat[i]=FREE

return i;
}

//删除key的value
public boolean removeKey(long key) {
int i = indexOfKey(key);//获取关键字的索引
if (i<0) return false;

this.state[i]=REMOVED;//作删除标记
this.values[i]=null; //注意:table[i]并没有置为null
this.distinct--;

if (this.distinct < this.lowWaterMark) {//存放的键值对少于lowWaterMark,重新调整
int newCapacity = chooseShrinkCapacity(
this.distinct,
this.minLoadFactor,
this.maxLoadFactor
);
rehash(newCapacity);//用新容量重新构造
}

return true;
}


public final Object get(long key) {//获取关键字对应的值
int i = indexOfKey(key);
if (i<0) {
return null;
}
else {
return values[i];
}
}

private final int indexOfKey(long key) {//求关键字之索引值,用于查找
final long tab[] = table;
final byte stat[] = state;
final int length = tab.length;
//这个是哈希函数
final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
int i = hash % length;//得到了关键字的索引值

//用于再哈希探测的步长
int decrement = (hash) % (length-2);//减量

if (decrement == 0) decrement = 1;

while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
i -= decrement;//往前找
if (i<0) i+=length;
}

if (stat[i] == FREE) return -1; // 没有找到
return i; //找到了,返回索引值
}


public void clear() {//清空
for (int i=0; i<state.length; i++) {
state[i] = FREE;
}
for (int i=0; i<values.length-1; i++) {
values[i] = null;
}

this.distinct = 0;
this.freeEntries = table.length;
trimToSize();//清空以后,容量不能太大,这里重新调整,以节约空间
}

public void trimToSize() {
int newCapacity = nextPrime((int)(1 + 1.2*size()));
if (table.length > newCapacity) {
rehash(newCapacity);
}
}


//是否包含key
public boolean containsKey(long key) {
return indexOfKey(key) >= 0;
}

//是否包含value
public boolean containsValue(Object value) {
return indexOfValue(value) >= 0;
}


public void ensureCapacity(int minCapacity) {//确保容量不小于minCapacity
if (table.length < minCapacity) {
int newCapacity = nextPrime(minCapacity);
rehash(newCapacity);//再哈希
}
}


protected int indexOfValue(Object value) {//获取值的索引
final Object val[] = values;
final byte stat[] = state;

for (int i=stat.length; --i >= 0;) {
if (stat[i]==FULL && val[i]==value) return i;
}

return -1; // not found
}

//获取value的第一个键值,可能的多个
public long keyOf(Object value) {
int i = indexOfValue(value);
if (i<0) return Long.MIN_VALUE;
return table[i];
}


public long[] keys() {//所有的键
long[] elements = new long[distinct];
long[] tab = table;
byte[] stat = state;
int j=0;
for (int i = tab.length ; i-- > 0 ;) {
if (stat[i]==FULL) {
elements[j++]=tab[i];
}
}
return elements;
}


public int size() {//当前存了多少对键值
return distinct;
}


public boolean isEmpty() {
return distinct == 0;
}

// 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
//而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。

protected void rehash(int newCapacity) {//用新的容量重新构建LongHashMap
int oldCapacity = table.length;//原来的容量
long oldTable[] = table;//原来的键
Object oldValues[] = values;//原来的值
byte oldState[] = state;

long newTable[] = new long[newCapacity];
Object newValues[] = new Object[newCapacity];
byte newState[] = new byte[newCapacity];

//(int) (newCapacity * minLoadFactor);
this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);

this.table = newTable;
this.values = newValues;
this.state = newState;
this.freeEntries = newCapacity-this.distinct; // 当前的剩余空间

for (int i = oldCapacity ; i-- > 0 ;) {
if (oldState[i]==FULL) {
long element = oldTable[i];
int index = indexOfInsertion(element);
newTable[index]=element;
newValues[index]=oldValues[i];
newState[index]=FULL;
}
}
}




public Object[] values() {
Object[] elements = new Object[distinct];

Object[] val = values;
byte[] stat = state;

int j=0;
for (int i = stat.length ; i-- > 0 ;) {
if (stat[i]==FULL) {
elements[j++]=val[i];
}
}
return elements;
}




private int chooseHighWaterMark(int capacity, double maxLoad) {
return Math.min(capacity-2, (int) (capacity * maxLoad));
}


protected int chooseLowWaterMark(int capacity, double minLoad) {
return (int) (capacity * minLoad);
}


protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
return nextPrime(Math.max(size+1, (int) ((2*size / (minLoad+maxLoad)))));
}


protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
return nextPrime(Math.max(size+1, (int) ((4*size / (minLoad+3*maxLoad)))));
}


protected int nextPrime(int desiredCapacity) {//对指定的容量,在素数表中进行对半查找,返回一个素数容量
int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
if(desiredCapacity==100) System.out.println("i="+i);
if (i<0) {
i = -i -1;
}
return primeCapacities[i];
}

private void printState(){
for(int i=0;i<state.length;i++)
System.out.print(state[i]+" ");
System.out.println();
}

public static final int LARGEST_PRIME = Integer.MAX_VALUE; //最大的素数.


private static final int[] primeCapacities = {//容量素数表
LARGEST_PRIME,

//chunk #1
5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
210719881,421439783,842879579,1685759167,

//chunk #2
433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
1854585413,

//chunk #3
953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
2004663929,

//chunk #4
1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,

//chunk #5
31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
587742049,1175484103,

//chunk #6
599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,

//chunk #7
311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
1344393353,

//chunk #8
3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
359339171,718678369,1437356741,

//chunk #9
43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
759155483,1518310967,

//chunk #10
379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
1600153859
};

static {
java.util.Arrays.sort(primeCapacities);
}

//测试一下
public static void main(String args[]){
LongHashMap lh=new LongHashMap(5);//初始容量为5
System.out.println("size="+lh.size());
for(int i=0;i<3;i++)//先放三个
lh.put(i, Integer.valueOf(i));
System.out.println("size="+lh.size());
lh.removeKey(1);//删除二个
lh.removeKey(2);
lh.put(123,"ok");//添加一个

//看看状态
lh.printState();
lh.put(1234,"oo");//再放一个
//看看状态
lh.printState();

//取出来
System.out.println(lh.get(0));
System.out.println(lh.get(123));
System.out.println(lh.get(1234));

}
}


运行:

C:\ex>java LongHashMap
size=0
size=3
1 2 2 1 0
1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
0
ok
oo

源码:
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值