JDK1.7下HashMap源码分析
首先,要清楚HashMap的存储结构,HashMap由数组+链表构成,链表是为了解决hash冲突,具体结构如图:
接着,我们来看一下new HashMap()做了什么。
public HashMap() {
//默认capacity=16,loadFactor=0.75
this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
}
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
this.loadFactor = loadFactor;
threshold = initialCapacity;
init();
}
void init() {}
HashMap构造函数,只做了两件事:1.设置加载因子loadFactor 2.设置阀值threshold。
下面,我们再分析下put(K,V)做了什么。
public V put(K key, V value) {
//如果table为空{},初始化table,计算capacity和threshold
if (table == EMPTY_TABLE) {
inflateTable(threshold);
}
//如果key为null,执行putForNullKey方法
if (key == null)
return putForNullKey(value);
int hash = hash(key);
int i = indexFor(hash, table.length);
for (Entry<K,V> e = table[i]; e != null; e = e.next) {
Object k;
//找到key所在的节点,更新值
if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
}
}
//table中不存在key节点,新增
modCount++;
addEntry(hash, key, value, i);
return null;
}
private void inflateTable(int toSize) {
// 找出大于toSize的最小2次幂
int capacity = roundUpToPowerOf2(toSize);
// 阀值=capacity*加载因子
threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
table = new Entry[capacity];
initHashSeedAsNeeded(capacity);
}
private static int roundUpToPowerOf2(int number) {
// 找出大于number的最小的2次幂
// assert number >= 0 : "number must be non-negative";
int rounded = number >= MAXIMUM_CAPACITY
? MAXIMUM_CAPACITY
: (rounded = Integer.highestOneBit(number)) != 0
? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded
: 1;
return rounded;
}
final boolean initHashSeedAsNeeded(int capacity) {
boolean currentAltHashing = hashSeed != 0;
boolean useAltHashing = sun.misc.VM.isBooted() &&
(capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
boolean switching = currentAltHashing ^ useAltHashing;
if (switching) {
hashSeed = useAltHashing
? sun.misc.Hashing.randomHashSeed(this)
: 0;
}
return switching;
}
private V putForNullKey(V value) {
//key为null,放在table[0]
//遍历table[0]链表
for (Entry<K,V> e = table[0]; e != null; e = e.next) {
//如果key为null,更新value,返回原值
if (e.key == null) {
V oldValue = e.value;
e.value = value;
e.recordAccess(this);
return oldValue;
}
}
// 遍历table[0]链表没有找到key=null的节点,需要插入一个节点
// 修改次数递增
modCount++;
// 在table[0]出插入一个key=null,hash=0的节点
addEntry(0, null, value, 0);
return null;
}
// 在table[bucketIndex]处插入一个新节点
void addEntry(int hash, K key, V value, int bucketIndex) {
//如果实际节点个数size大于阀值,并且table[bucketIndex]不为空,扩容
if ((size >= threshold) && (null != table[bucketIndex])) {
resize(2 * table.length);
hash = (null != key) ? hash(key) : 0;
//计算新的桶的位置
bucketIndex = indexFor(hash, table.length);
}
createEntry(hash, key, value, bucketIndex);
}
//扩容
void resize(int newCapacity) {
Entry[] oldTable = table;
int oldCapacity = oldTable.length;
if (oldCapacity == MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return;
}
//新的容量为原来的2倍
Entry[] newTable = new Entry[newCapacity];
transfer(newTable, initHashSeedAsNeeded(newCapacity));
table = newTable;
threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
}
// table转移
void transfer(Entry[] newTable, boolean rehash) {
int newCapacity = newTable.length;
for (Entry<K,V> e : table) {
while(null != e) {
Entry<K,V> next = e.next;
if (rehash) {
e.hash = null == e.key ? 0 : hash(e.key);
}
//计算节点在新的数组中的位置
int i = indexFor(e.hash, newCapacity);
//将e节点插入newTable[i]的头部
e.next = newTable[i];
newTable[i] = e;
e = next;
}
}
}
从put(K,V)源码分析,大致流程为:table={}初始化,Key=null,在table[0]链表中找key=null的节点,找到就更新value,没找到,在table[0]链表上新增节点;key!=null,根据hash找到桶的位置i,遍历table[i]节点,找到key相同的节点,找到就更新value,没找到,在table[i]新增节点。要注意的是,新增节点的时候,要检查当前实际元素个数是否大于阀值,如果大于阀值,需要扩容成原来的2倍。扩容过程中,遍历数组中所有链表节点,每个节点rehash。
下面,我们来分析一下,为什么计算桶的位置用hash&(capacity-1),为什么capacity长度是2的幂。
hash&(capacity-1)=hash%capacity,因为位操作符比取模运算要快。Capacity=2的次幂,转换为2进制就是10000000….0,capacity-1就是01111111….1,按位与,低位都会参与运算,这有利于减少hash冲突,分散链表结构。