构造函数
/**
* Constructs an empty <tt>HashMap</tt> with the default initial capacity
* (16) and the default load factor (0.75).
*/
public HashMap() {
this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
}
哈希方法
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
为什么要把高16位和底16位进行异或运算呢
先来看是如何做hash计算的
n为hsahmap当前大小
(n - 1) & hash
在n位2的n次方的情况下,(n - 1) & hash == hash%n,也因此容量capcity必须为2的n次方
n-1,得到的二进制刚好为全1。
那么问题来了,hashMap开始是从16开始的,只占了4位二进制,也就是说在32位二进制中,其高位均为0,那么其与hash进行与运算,hash值的高位完全浪费了,因此高16与低16位进行异或运算可以更好的散列hash值。
putVal
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
创建插入第一个元素后,table数组为空,进入resize ,数组类型为Node
static class Node<K,V> implements Map.Entry<K,V> {
final int hash;
final K key;
V value;
Node<K,V> next;
第一次resize过程:
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
设置默认cap 为16,newThr为实际存储最多数据,值为负载因子*容量
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
- 情况1 如果之前不存在元素,直接放在数组上。
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
}
++modCount;
if (++size > threshold)
resize();
modCount供迭代器使用防止,在遍历时删减集合,如果size>threshold,扩容。
- 情况2 table相应位置上不为空
2.1 新插入的值等于table[i]相应位置的值
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
直接将其替换
2.2 table相应位置的节点已经变为树节点,即转换成了红黑树
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
插入到红黑树中
2.3 直接插入到后继节点,此时还为转换为红黑树
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
遍历链表,如果有先等的元素直接替换,采用尾插法,插入后如果,链表长度大于TREEIFY_THRESHOLD,默认8,转为红黑树。
demo
HashMap map = new HashMap();
map.put(1,1);//0
map.put(17,1);//16
map.put(33,1);//32
map.put(49,1);//48
map.put(65,1);//64
map.put(81,1);//80
map.put(97,1);//96,
map.put(113,1);//112
map.put(129,1);//128,转为红黑树
final void treeifyBin(Node<K,V>[] tab, int hash) {
int n, index; Node<K,V> e;
if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
resize();
else if ((e = tab[index = (n - 1) & hash]) != null) {
TreeNode<K,V> hd = null, tl = null;
do {
TreeNode<K,V> p = replacementTreeNode(e, null);
if (tl == null)
hd = p;
else {
p.prev = tl;
tl.next = p;
}
tl = p;
} while ((e = e.next) != null);
if ((tab[index] = hd) != null)
hd.treeify(tab);
}
}
如果tab数组的长度小于_MIN_TREEIFY_CAPACITY默认64_
那么,采用扩容的方式,而不是转化成红黑树
看下此时的扩容resize
if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
cap和threshold均扩大一倍
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
else { // preserve order
Node<K,V> loHead = null, loTail = null;
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
遍历老的tab数组
- 如果 table[i]位置上只有一个元素,直接用重新hash找到新的元素位置,e.hash & (newCap - 1)
- 树型节点,后面再看
- 遍历table[i]上的链表,如果(e.hash & oldCap) == 0,放入链表中loHead 和loTail 否则放入hiTail和hiHead的链表中 ,lo链表直接位置不变放入新链表中,hi链表当前位置j+oldCap放入新链表中,旧数组的不同槽一定不会重新分到相同的的新槽中,不会冲突,因此可以直接复制
newTab[j] = loHead;
newTab[j + oldCap] = hiHead;
e.hash & oldCap如果为0, => e.hash & (oldCap-1) = e.hash & (newCap-1)
因为newCap-1 二进制比oldCap-1多了高位1,而e.hash那个位置刚好为0
e.hash & oldCap如果不为0, => e.hash & (oldCap-1) +oldCap = e.hash & (newCap-1)
因为newCap-1 二进制比oldCap-1多了高位1,而e.hash那个位置刚好为1,那这位1代表的值就是oldCap
链表转红黑树
HashMap map = new HashMap();
for (int i = 1; i <= 32; i++) {//扩容至64
map.put(i, i);
}
//向槽1投放数据
map.put(65, 65);
map.put(129, 129); //128
map.put(257, 257); //256
map.put(513, 513); // 512
map.put(1025, 1025); //1024
map.put(2049, 2049); //2048
map.put(4097, 4097); //4196
map.put(8193, 8193); //8192 转红黑树
转化
/**
* Replaces all linked nodes in bin at index for given hash unless
* table is too small, in which case resizes instead.
*/
final void treeifyBin(Node<K,V>[] tab, int hash) {
int n, index; Node<K,V> e;
if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
resize();
else if ((e = tab[index = (n - 1) & hash]) != null) {
TreeNode<K,V> hd = null, tl = null;
do {
TreeNode<K,V> p = replacementTreeNode(e, null);
if (tl == null)
hd = p;
else {
p.prev = tl;
tl.next = p;
}
tl = p;
} while ((e = e.next) != null);
if ((tab[index] = hd) != null)
hd.treeify(tab);
}
}
遍历链表上所有元素,尾插法到hd链表,进入treeify
/**
* Forms tree of the nodes linked from this node.
*/
final void treeify(Node<K,V>[] tab) {
TreeNode<K,V> root = null;
for (TreeNode<K,V> x = this, next; x != null; x = next) {
next = (TreeNode<K,V>)x.next;
x.left = x.right = null;
if (root == null) {
x.parent = null;
x.red = false;
root = x;
}
else {
K k = x.key;
int h = x.hash;
Class<?> kc = null;
for (TreeNode<K,V> p = root;;) {
int dir, ph;
K pk = p.key;
if ((ph = p.hash) > h)
dir = -1;
else if (ph < h)
dir = 1;
else if ((kc == null &&
(kc = comparableClassFor(k)) == null) ||
(dir = compareComparables(kc, k, pk)) == 0)
dir = tieBreakOrder(k, pk);
TreeNode<K,V> xp = p;
if ((p = (dir <= 0) ? p.left : p.right) == null) {
x.parent = xp;
if (dir <= 0)
xp.left = x;
else
xp.right = x;
root = balanceInsertion(root, x);
break;
}
}
}
}
moveRootToFront(tab, root);
}
构建红黑树
这个方法实际上就是二叉查找树的插入方式,大于当前节点向右否则向左,最终一定插到叶子节点
static <K,V> TreeNode<K,V> balanceInsertion(TreeNode<K,V> root,
TreeNode<K,V> x) {
x.red = true;//插入节点设置成红节点
for (TreeNode<K,V> xp, xpp, xppl, xppr;;) {
if ((xp = x.parent) == null) {//第一个节点就是根节点,直接设置黑色
x.red = false;
return x;
}
else if (!xp.red || (xpp = xp.parent) == null)//如果父节点不是红色或者根节点,直接返回,不影响红黑树条件
return root;
if (xp == (xppl = xpp.left)) {//父节点在左节点上
if ((xppr = xpp.right) != null && xppr.red) {
//父红,叔叔红情况,父红-叔红时,将父/叔节(P/U)点涂黑,祖父节点(GP)涂红;而后以祖父节点(GP)作为新的平衡节点N,递归执行平衡操作
xppr.red = false;
xp.red = false;
xpp.red = true;
x = xpp;
}
else {//父红,叔黑
if (x == xp.right) { //父节点在左子树,自身也右节点,父节点和N不在同一边先左旋,旋到同一边
root = rotateLeft(root, x = xp);
xpp = (xp = x.parent) == null ? null : xp.parent;
}
//经过旋转,满足了,父N同左的情况,P涂黑,GP涂红,以GP为轴右旋,
if (xp != null) {
xp.red = false;
if (xpp != null) {
xpp.red = true;
root = rotateRight(root, xpp);
}
}
}
}
else {//父节点在右节点上
if (xppl != null && xppl.red) {//父红叔红情况,和上面的处理方式相同
xppl.red = false;
xp.red = false;
xpp.red = true;
x = xpp;
}
else {
if (x == xp.left) {//父右n左,先旋转到同一边
root = rotateRight(root, x = xp);
xpp = (xp = x.parent) == null ? null : xp.parent;
}
//父右N右,父N同右”指的是:父节点是祖父节点的右子,N为父节点的右子。
此时以祖父节点(GP)为支点进行左旋;将P涂黑,将GP涂红。
if (xp != null) {
xp.red = false;
if (xpp != null) {
xpp.red = true;
root = rotateLeft(root, xpp);
}
}
}
}
}
}
上面大致对红黑树的插入平衡,想更请清晰的了解,请看之前的红黑树文章。
remove
map的remove方法
/**
* Implements Map.remove and related methods.
*
* @param hash hash for key
* @param key the key
* @param value the value to match if matchValue, else ignored
* @param matchValue if true only remove if value is equal
* @param movable if false do not move other nodes while removing
* @return the node, or null if none
*/
final Node<K,V> removeNode(int hash, Object key, Object value,
boolean matchValue, boolean movable) {
Node<K,V>[] tab; Node<K,V> p; int n, index;
if ((tab = table) != null && (n = tab.length) > 0 &&
(p = tab[index = (n - 1) & hash]) != null) {
Node<K,V> node = null, e; K k; V v;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
node = p;
else if ((e = p.next) != null) {
if (p instanceof TreeNode)
node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
else {
do {
if (e.hash == hash &&
((k = e.key) == key ||
(key != null && key.equals(k)))) {
node = e;
break;
}
p = e;
} while ((e = e.next) != null);
}
}
if (node != null && (!matchValue || (v = node.value) == value ||
(value != null && value.equals(v)))) {
if (node instanceof TreeNode)
((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
else if (node == p)
tab[index] = node.next;
else
p.next = node.next;
++modCount;
--size;
afterNodeRemoval(node);
return node;
}
}
return null;
}
如果删除元素就在头节点
tab[index] = node.next;
如果在链表上
p.next = node.next;
如果在红黑树上
((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
大致流程为先按照二叉查找树的流程删节点,然后再平衡,保持红黑树,删除的步骤十分复杂,就不再逐行代码解析
扩容
初始化size
/**
* Returns a power of two size for the given target capacity.
*/
static final int tableSizeFor(int cap) {
int n = cap - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}
这个方法的功能为将cap最高位向左都设置成全1。共移动31位,加上自身1 ,最高是32位全1 ,如果大于MAXIMUM_CAPACITY 2的30次方,设置成这个,否则全1的n再+1得出的值刚好为2的n次方
reisze
/**
* Initializes or doubles table size. If null, allocates in
* accord with initial capacity target held in field threshold.
* Otherwise, because we are using power-of-two expansion, the
* elements from each bin must either stay at same index, or move
* with a power of two offset in the new table.
*
* @return the table
*/
final Node<K,V>[] resize() {
Node<K,V>[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
}
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
table = newTab;
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
else { // preserve order
Node<K,V> loHead = null, loTail = null;
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
return newTab;
}
扩容链表情况前面已经说过,这里看红黑树槽的扩容
进入split
/**
* Splits nodes in a tree bin into lower and upper tree bins,
* or untreeifies if now too small. Called only from resize;
* see above discussion about split bits and indices.
*
* @param map the map
* @param tab the table for recording bin heads
* @param index the index of the table being split
* @param bit the bit of hash to split on
*/
final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) {
TreeNode<K,V> b = this;
// Relink into lo and hi lists, preserving order
TreeNode<K,V> loHead = null, loTail = null;
TreeNode<K,V> hiHead = null, hiTail = null;
int lc = 0, hc = 0;
for (TreeNode<K,V> e = b, next; e != null; e = next) {
next = (TreeNode<K,V>)e.next;
e.next = null;
if ((e.hash & bit) == 0) {
if ((e.prev = loTail) == null)
loHead = e;
else
loTail.next = e;
loTail = e;
++lc;
}
else {
if ((e.prev = hiTail) == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
++hc;
}
}
if (loHead != null) {
if (lc <= UNTREEIFY_THRESHOLD)
tab[index] = loHead.untreeify(map);
else {
tab[index] = loHead;
if (hiHead != null) // (else is already treeified)
loHead.treeify(tab);
}
}
if (hiHead != null) {
if (hc <= UNTREEIFY_THRESHOLD)
tab[index + bit] = hiHead.untreeify(map);
else {
tab[index + bit] = hiHead;
if (loHead != null)
hiHead.treeify(tab);
}
}
}
注意:虽然链表转为了红黑树,但是树节点出了left和right的关系,还保持了链表的next,因此按照链表也能遍历所有节点
同样分为两个链表,lo和hi链表,lo的槽位置不变,hi的槽位置变为oldcap+(hash & (oldcap-1))如果链表长度小于6,重新变为链表,如果lo或者hi链表与e已经不同,说明变化了需要重新根据链表变为树,如果相同,说明没变化,本身就已经刚刚好是红黑树,无需额外处理。