HashSet源码分析
一、准备工作
package com.liu.collection;
import java.util.HashSet;
import java.util.Set;
public class Set_ {
public static void main(String[] args) {
Set<Object> set = new HashSet<>();
for(int i=0;i<10;i++){
set.add(i);
}
//已经添加的元素无法再添加
System.out.println(set.add(1));
//两个新建的对象
System.out.println(set.add(new Dog("xioabai", 3))); true
System.out.println(set.add(new Dog("xioabai", 3))); true
//相同的字符串对象无法加入 因为String重写了equals方法
System.out.println(set.add(new String("11"))); true
System.out.println(set.add(new String("11"))); false
System.out.println("set=>"+set);
}
}
二、源码分析
第一次add
进入构造函数(HashSet的底层是HashMap)
/**
* Constructs a new, empty set; the backing <tt>HashMap</tt> instance has
* default initial capacity (16) and load factor (0.75).
*/
public HashSet() {
map = new HashMap<>();//证明HashSet的底层是HashMap
}
跳出HashSet构造函数
Integer装箱
public static Integer valueOf(int i) {
if (i >= IntegerCache.low && i <= IntegerCache.high)
return IntegerCache.cache[i + (-IntegerCache.low)];
return new Integer(i);
}
跳出
进入add方法
private static final Object PRESENT = new Object();
public boolean add(E e) {
//调用map的put方法,set是一个无重复元素的集合,所以放在map的key中,因为map的key也同样有此特性
//PRESENT:仅起到占位作用
return map.put(e, PRESENT)==null;
}
进入put方法
public V put(K key, V value) {
//put方法底层调用putVal方法
return putVal(hash(key), key, value, false, true);
}
进入hash(key)方法:计算hash值
^(异或运算符)
运算规则是:两个数转为二进制,然后从高位开始比较,如果相同则为0,不相同则为1。
比如:8^11.
8转为二进制是1000,11转为二进制是1011.从高位开始比较得到的是:0011.然后二进制转为十进制,就是Integer.parseInt(“0011”,2)=3;
static final int hash(Object key) {
int h;
//(h = key.hashCode()) ^ (h >>> 16) 将h的hashCode与h右移16位进行异或进而计算出一个新的
//hash值
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
public int hashCode() {
return Integer.hashCode(value);
}
跳出hash()
回到put方法
public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}
进入putVal方法(重点/难点)
/**
* The table, initialized on first use, and resized as
* necessary. When allocated, length is always a power of two.
* (We also tolerate length zero in some operations to allow
* bootstrapping mechanics that are currently not needed.)
*这个表,第一次使用时初始化,并根据需要调整大小。分配时,长度始终为2的幂。
*(我们还在某些操作中允许长度为零,以允许当前不需要的引导机制。
*/
transient Node<K,V>[] table;
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
//新建1个Node类型的数组tab,一个Node类的对象p
Node<K,V>[] tab; Node<K,V> p; int n, i;
//如何table为null或者tab.length为0
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
resize方法(扩容机制)重点
/**
* The next size value at which to resize (capacity * load factor).
*要调整大小的下一个大小值(容量*负载系数)
* @serial
*/
// (The javadoc description is true upon serialization.
// Additionally, if the table array has not been allocated, this
// field holds the initial array capacity, or zero signifying
// DEFAULT_INITIAL_CAPACITY.)
int threshold;
/**
* The maximum capacity, used if a higher value is implicitly specified
* by either of the constructors with arguments.
* MUST be a power of two <= 1<<30.
*最大容量,在两个带参数的构造函数中的任何一个隐式指定了更高的值时使用。
*必须是2的幂<=1<<30
*/
static final int MAXIMUM_CAPACITY = 1 << 30;
final Node<K,V>[] resize() {
Node<K,V>[] oldTab = table;
//求得原数组table的长度
int oldCap = (oldTab == null) ? 0 : oldTab.length;
//threshold:要调整大小的下一个大小值(容量*负载系数)初始为0
//若达到threshold或者说达到oldThr时,就提前创建一个数组,以备后续使用
int oldThr = threshold;
//新的容量 新的要调整下一个数组容量
int newCap, newThr = 0;
//如果原数组容量大于0
if (oldCap > 0) {
//若老数组容量大于等于2^30
if (oldCap >= MAXIMUM_CAPACITY) {
//更新threshold
threshold = Integer.MAX_VALUE;
//返回老数组
return oldTab;
}
//老数组扩大2倍<MAXIMUM_CAPACITY&&老数组容量 >= 16
//DEFAULT_INITIAL_CAPACITY: 16
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
//将oldThr*2
newThr = oldThr << 1; // double threshold
}
//若原预加载容量大于0
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
//原数组为空,则将容量初始化为16,且加载容量为16*0.75
else { // zero initial threshold signifies using defaults
//容量初始化为16
newCap = DEFAULT_INITIAL_CAPACITY;
//加载容量为16*0.75
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
//newThr == 0时进行计算newThr操作
if (newThr == 0) {
//容量*加载因子
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
//重新记录newThr
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
//真正创建出容量为newCapicity的Node型数组
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
//赋值给table
table = newTab;
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
else { // preserve order
Node<K,V> loHead = null, loTail = null;
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
//返回新数组
return newTab;
}
第一次扩容示意图
跳回putVal方法
位与运算符(&)
//位与运算符(&)
//运算规则:两个数都转为二进制,然后从高位开始比较,如果两个数都为1则为1,否则为0。
//比如:129&128.
//129转换成二进制就是10000001,128转换成二进制就是10000000。从高位开始比较得到,得到10000000,
//即128.
/**
* The table, initialized on first use, and resized as
* necessary. When allocated, length is always a power of two.
* (We also tolerate length zero in some operations to allow
* bootstrapping mechanics that are currently not needed.)
*这个表,第一次使用时初始化,并根据需要调整大小。分配时,长度始终为2的幂。
*(我们还在某些操作中允许长度为零,以允许当前不需要的引导机制。
*/
transient Node<K,V>[] table;
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
//新建1个Node类型的数组tab,一个Node类的对象p
Node<K,V>[] tab; Node<K,V> p; int n, i;
//如何table为null或者tab.length为0
if ((tab = table) == null || (n = tab.length) == 0)
//resize()扩容 返回扩容后的数组,并将该长度赋值给n
n = (tab = resize()).length;
//位与运算符(&)
//运算规则:两个数都转为二进制,然后从高位开始比较,如果两个数都为1则为1,否则为0。
//比如:129&128.
//129转换成二进制就是10000001,128转换成二进制就是10000000。从高位开始比较得到,得到10000000,
//即128.
//若tab[i]为null i=(n-1)&hash
if ((p = tab[i = (n - 1) & hash]) == null)
//tab[i]赋值为newNode(hash, key, value, null);
tab[i] = newNode(hash, key, value, null);
//若当前结点不为空
else {
Node<K,V> e; K k;
//
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
//判断是否需要扩容
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
返回put方法
返回add方法
第一个元素添加成功示意图
第二个元素add
进入putVal方法
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
//若p=tab[i] == null,p位置没有元素
if ((p = tab[i = (n - 1) & hash]) == null)
//则新建节点,并赋值给当前下标
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
跳出putVal方法
跳出add方法
添加相同元素
instanceof
instanceof 严格来说是Java中的一个双目运算符,用来测试一个对象是否为一个类的实例
size:每加入一个元素size就加1,而不是必须把table[i]加到12个才进行扩容,是加入12个元素后达到13时,进行扩容给操作。(以第一次扩容为例)
进入putVal方法
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
//若添加了hash值相同的元素,则需要进一步判断
else {
Node<K,V> e; K k;
//若hash值相同并且key值相同且key不为空
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
//将p的值赋给e
e = p;
//若p为TreeNode的实例
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
//若p为链表的实例
else {
//死循环,用break跳出
//binCount:记录链表的结点数量
for (int binCount = 0; ; ++binCount) {
//找到p的下一个为空的结点
if ((e = p.next) == null) {
//新建一个节点
p.next = newNode(hash, key, value, null);
//若binCount>=8-1
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
//进入树化方法
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
//若e不为空
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
//返回该值
return oldValue;
}
}
++modCount;
//====================================在这里进行扩容=================================
//若元素个数大于threshold
if (++size > threshold)
//进行扩容
resize();
afterNodeInsertion(evict);
return null;
}
扩容操作
元素个数大于threshold(容量*扩容因子)
此时进行扩容,而不是达到最大容量才扩容
size:每加入一个元素size就加1,而不是必须把table[i]加到12个才进行扩容,是加入12个元素后达到13时,进行扩容给操作。(以第一次扩容为例)
final Node<K,V>[] resize() {
//将原数组内容赋给oldTab
Node<K,V>[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
//若老数组不为空
if (oldCap > 0) {
//数组长度 >= MAXIMUM_CAPACITY
if (oldCap >= MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return oldTab;
}
//数组长度*2<MAXIMUM_CAPACITY且oldCap>=16,可以扩容 则将数组扩容为原来的2倍
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
//将扩容容量*2 无需再用容量*0.75 因为oldThr << 1=容量*0.75
newThr = oldThr << 1; // double threshold
}
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
//重新赋值
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
//真正的扩容
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
//赋值
table = newTab;
//若原数组不为空
//重点:========================扩容后,将原数组的值赋值给新的数组=======================
if (oldTab != null) {
//遍历原数组
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
//当前节点不为空
if ((e = oldTab[j]) != null) {
//清空当前结点 oldTab[j]位置元素
oldTab[j] = null;
//若当前位置结点没有下一结点
if (e.next == null)
//直接将该节点赋值给新数组即可
newTab[e.hash & (newCap - 1)] = e;
//若为树形结构,则调用树形结构复制方法
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
//若当前位置链表元素个数大于等于2,则进行链表复制
else { // preserve order
//记录头 和尾
Node<K,V> loHead = null, loTail = null;
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
//记录下一结点
next = e.next;
//若为头结点
if ((e.hash & oldCap) == 0) {
//尾结点没有元素,链表为空
if (loTail == null)
//将当前节点赋值给头
loHead = e;
//尾结点有元素
else
//给尾结点的下一个位置赋值
loTail.next = e;
//重新将尾结点指向最后一个结点
loTail = e;
}
//若不为头结点
else {
//尾结点为空
if (hiTail == null)
//hiHead指向e
hiHead = e;
else
//给尾结点的下一个位置赋值
hiTail.next = e;
//重新将尾结点指向最后一个结点
hiTail = e;
}
} while ((e = next) != null);//直到遍历完当前链表的所有元素
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;//将当前链表赋值给newTab[j]
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
return newTab;
}
跳出当前方法
回到add
添加成功
扩容示意图
树化流程
准备工作
package com.liu.collection;
import java.util.Objects;
public class Dog {
String name;
int age;
public Dog() {
}
public Dog(String name, int age) {
this.name = name;
this.age = age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
//重写hashCode方法,让所有Dog的hashCode相等,这样就可以放在同一个位置,就可以形成链表
@Override
public int hashCode() {
return 100;
}
@Override
public String toString() {
return "Dog{" +
"name='" + name + '\'' +
", age=" + age +
'}';
}
}
package com.liu.collection;
import java.util.HashSet;
import java.util.Set;
public class Set_ {
public static void main(String[] args) {
Set<Object> set = new HashSet<>();
for (int i = 0; i < 20; i++) {
System.out.println(set.add(new Dog("xioabai", i)));
}
System.out.println("set=>"+set);
}
}
图解
容量达到7时,不会操作
链表元素未超过8个,数组容量未超过64个,所以不扩容
链表元素超过8个但数组容量未达到64个,所以先扩容一次
链表元素超过8个但数组容量未达到64个,所以再扩容一次
链表元素超过8个且数组容量超过64个,则将链表转化为红黑树
三、总结
1、HashSet的底层时HashMap
2、第一次扩容大小为16
3、HashMap的底层:数组+链表+红黑树
4、对key的hash值进行了重新计算,而不是简单的直接取hashCode()
5、size: 记录当前数组的容量,当size>threshold(容量*扩容因子)时,进行扩容操作
注意:这里的size指的是map中添加的元素的个数,而不是数组的容量,即为每添加一个元素size就加1
6、当数组长度达到64且链表长度达到8时,将链表进行树化
7、扩容时容量threshold 数组为16,threshold =16*0.75=12 ,达到12时,数组扩容为原来的2倍