HashMap源码分析

最新推荐文章于 2023-03-27 17:08:56 发布

pzp_7676

最新推荐文章于 2023-03-27 17:08:56 发布

阅读量506

点赞数

文章标签： hashmap object null iterator table equals

本文链接：https://blog.csdn.net/pzp_7676/article/details/1869846

版权

关键词 ： HashMap源码分析

HashMap 是 Java 新 Collection Framework 中用来代替 HashTable 的一个实现， HashMap 和 HashTable 的区别是： HashMap 是未经同步的，而且允许 null 值。 HashTable 继承 Dictionary ，而且使用了 Enumeration ，所以被建议不要使用。
HashMap 的声明如下：
public class HashMap extends AbstractMap implements Map, Cloneable,Serializable
有关 AbstractMap ： http://blog.csdn.net/treeroot/archive/2004/09/20/110343.aspx
有关 Map ： http://blog.csdn.net/treeroot/archive/2004/09/20/110331.aspx
有关 Cloneable ： http://blog.csdn.net/treeroot/archive/2004/09/07/96936.aspx
这个类比较复杂，这里只是重点分析了几个方法，特别是后面涉及到很多内部类都没有解释
不过都比较简单。

static final int DEFAULT_INITIAL_CAPACITY = 16; 默认初始化大小

static final int MAXIMUM_CAPACITY = 1 << 30; 最大初始化大小

static final float DEFAULT_LOAD_FACTOR = 0.75f; 默认加载因子

transient Entry[] table; 一个 Entry 类型的数组，数组的长度为 2 的指数。

transient int size; 映射的个数

int threshold; 下一次扩容时的值

final float loadFactor; 加载因子

transient volatile int modCount; 修改次数

public HashMap(int initialCapacity, float loadFactor) {
　　 if (initialCapacity < 0)
　　　　 throw new IllegalArgumentException("Illegal initial capacity: " +initialCapacity);
　　 if (initialCapacity > MAXIMUM_CAPACITY)
　　　　 initialCapacity = MAXIMUM_CAPACITY;
　　 if (loadFactor <= 0 || Float.isNaN(loadFactor))
　　　　 throw new IllegalArgumentException("Illegal load factor: " +loadFactor);
　　 int capacity = 1;
　　 while (capacity < initialCapacity)
　　　　 capacity <<= 1;
　　 this.loadFactor = loadFactor;
　　 threshold = (int)(capacity * loadFactor);
　　 table = new Entry[capacity];
　　 init();
}

public HashMap(int initialCapacity) {
　　 this(initialCapacity, DEFAULT_LOAD_FACTOR);
}

public HashMap() {
　　 this.loadFactor = DEFAULT_LOAD_FACTOR;
　　 threshold = (int)(DEFAULT_INITIAL_CAPACITY);
　　　　　　注意：这里应该是一个失误！应该是： threshold =(int)(DEFAULT_INITIAL_CAPACITY * loadFactor);
　　 table = new Entry[DEFAULT_INITIAL_CAPACITY];
　　　 init();
}

public HashMap(Map m) {
　　 this(Math.max((int) (m.size() / DEFAULT_LOAD_FACTOR) + 1, DEFAULT_INITIAL_CAPACITY), 　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　 DEFAULT_LOAD_FACTOR);
　　 putAllForCreate(m);
}

void init() {}

static final Object NULL_KEY = new Object();

static Object maskNull(Object key){
　　 return (key == null ? NULL_KEY : key);
}

static Object unmaskNull(Object key) {
　　 return (key == NULL_KEY ? null : key);
}

static int hash(Object x) {
　　 int h = x.hashCode();
　　 h += ~(h << 9);
　　 h ^= (h >>> 14);
　　 h += (h << 4);
　　 h ^= (h >>> 10);
return h;
}
在 HashTable 中没有这个方法，也就是说 HashTable 中是直接用对象的 hashCode 值，但是 HashMap 做了改进用这个算法来获得哈希值。

static boolean eq(Object x, Object y) {
　　 return x == y || x.equals(y);
}

static int indexFor(int h, int length) {
　　 return h & (length-1);
}
根据哈希值和数组的长度来返回该 hash 值在数组中的位置，只是简单的与关系。

public int size() {
　　 return size;
}

public boolean isEmpty() {
　　 return size == 0;
}

public Object get(Object key) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 Entry e = table[i];
　　 while (true) {
　　　　 if (e == null) return e;
　　　　 if (e.hash == hash && eq(k, e.key)) return e.value;
　　　　 e = e.next;
　　 }
}
这个方法是获取数据的方法，首先获得哈希值，这里把 null 值掩饰了，并且 hash 值经过函数 hash() 修正。然后计算该哈希值在数组中的索引值。如果该索引处的引用为 null ，表示 HashMap 中不存在这个映射。否则的话遍历整个链表，这里找到了就返回 , 如果没有找到就遍历到链表末尾，返回 null 。这里的比较是这样的： e.hash==hash && eq(k,e.key) 也就是说如果 hash 不同就肯定认为不相等， eq 就被短路了，只有在 hash 相同的情况下才调用 equals 方法。现在我们该明白 Object 中说的如果两个对象 equals 返回 true ，他们的 hashCode 应该相同的道理了吧。假如两个对象调用 equals 返回 true ，但是 hashCode 不一样，那么在 HashMap 里就认为他们不相等。

public boolean containsKey(Object key) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 Entry e = table[i];
　　 while (e != null) {
　　　　 if (e.hash == hash && eq(k, e.key)) return true;
　　　　 e = e.next;
　　 }
　　 return false;
}
这个方法比上面的简单，先找到哈希位置，再遍历整个链表，如果找到就返回 true 。
Entry getEntry(Object key) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 Entry e = table[i];
　　 while (e != null && !(e.hash == hash && eq(k, e.key)))
　　　　 e = e.next;
　　 return e;
}
这个方法根据 key 值返回 Entry 节点，也是先获得索引位置，再遍历链表，如果没有找到返回的是 null 。

public Object put(Object key, Object value) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 for (Entry e = table[i]; e != null; e = e.next) {
　　　　 if (e.hash == hash && eq(k, e.key)) {
　　　　　　 Object oldValue = e.value;
　　　　　　 e.value = value;
　　　　　　 e.recordAccess(this);
　　　　　　 return oldValue;
　　　　 }
　　 }
　　 modCount++;
　　 addEntry(hash, k, value, i);
　　 return null;
}
首先获得 hash 索引位置，如果该位置的引用为 null ，那么直接插入一个映射，返回 null 。如果此处的引用不是 null ，必须遍历链表，如果找到一个相同的 key ，那么就更新该 value ，同时返回原来的 value 值。如果遍历完了没有找到，说明该 key 值不存在，还是插入一个映射。如果 hash 值足够离散的话，也就是说该索引没有被使用的话，那么不不用遍历链表了。相反，如果 hash 值不离散，极端的说如果是常数的话，所有的映射都会在这一个链表上，效率会极其低下。这里举一个最简单的例子，写两
个不同的类作为 key 插入到 HashMap 中，效率会远远不同。
class Good{
　　 int i;
　　 public Good(int i){
　　　 this.i=i;
　　 }
　　 public boolean equals(Object o){
　　　 return (o instanceof Good) && (this.i==((Good)o).i)
　　 }
　　 public int hashCode(){
　　　 return i;
　　 }
}
class Bad{
　　 int i;
　　 public Bad(int i){
　　　　 this.i=i;
　　 }
　　 public boolean equals(Object o){
　　　　 return (o instanceof Bad) && (this.i==((Bad)o).i)
　　 }
　　 public int hashCode(){
　　　 return 0;
　　 }
}
执行代码：
Map m1=new HashMap();
Map m2=new HashMap();
for(int i=0;i<100;i++){
　　 m1.put(new Good(i),new Integer(i)); // 这里效率非常高
}
for(int i=0;i<100;i++){
　　 m2.put(new Bad(i),new Integer(i)); // 这里几乎要崩溃
}
上面的是两个非常极端的例子，执行一下就知道差别有多大。

private void putForCreate(Object key, Object value) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 for (Entry e = table[i]; e != null; e = e.next) {
　　　　 if (e.hash == hash && eq(k, e.key)) {
　　　　　　　 e.value = value;
　　　　　　　 return;
　　　　 }
　　 }
　　 createEntry(hash, k, value, i);
}

void putAllForCreate(Map m) {
　　 for (Iterator i = m.entrySet().iterator(); i.hasNext(); ) {
　　　　 Map.Entry e = (Map.Entry) i.next();
　　　　 putForCreate(e.getKey(), e.getValue());
　　 }
}
上面的两个方法是被构造函数和 clone 方法调用的。

void resize(int newCapacity) {
　　 Entry[] oldTable = table;
　　 int oldCapacity = oldTable.length;
　　 if (size < threshold || oldCapacity > newCapacity)
　　　　 return;
　　 Entry[] newTable = new Entry[newCapacity];
　　 transfer(newTable);
　　 table = newTable;
　　 threshold = (int)(newCapacity * loadFactor);
}
这个方法在需要的时候重新分配空间，相当于 ArrayList 的 ensureCapacity 方法，不过这个更加复杂。

void transfer(Entry[] newTable) {
　　 Entry[] src = table;
　　 int newCapacity = newTable.length;
　　 for (int j = 0; j < src.length; j++) {
　　　　 Entry e = src[j];
　　　　 if (e != null) {
　　　　　　 src[j] = null;
　　　　　　 do {
　　　　　　　　　 Entry next = e.next;
　　　　　　　　　 int i = indexFor(e.hash, newCapacity);
　　　　　　　　　 e.next = newTable[i];
　　　　　　　　　 newTable[i] = e;
　　　　　　　　　 e = next;
　　　　　　 } while (e != null);
　　　　 }
　　 }
}
遍历原来的数组，如果该 Entry 不是 null 的话，说明有映射，然后遍历这个链表，把所有的映射插入到新的数组中，注意这里要从新计算索引位置。

public void putAll(Map t) {
　　 int n = t.size();
　　 if (n == 0)
　　　　 return;
　　 if (n >= threshold) {
　　　　 n = (int)(n / loadFactor + 1);
　　　　 if (n > MAXIMUM_CAPACITY)
　　　　　　 n = MAXIMUM_CAPACITY;
　　　　 int capacity = table.length;
　　　　 while (capacity < n) capacity <<= 1;
　　　　　　 resize(capacity);
　　 }
　　 for (Iterator i = t.entrySet().iterator(); i.hasNext(); ) {
　　　　 Map.Entry e = (Map.Entry) i.next();
　　　　 put(e.getKey(), e.getValue());
　　 }
}
这个方法先确定是否需要扩大空间，然后循环调用 put 方法。

public Object remove(Object key) {
　　 Entry e = removeEntryForKey(key);
　　 return (e == null ? e : e.value);
}

Entry removeEntryForKey(Object key) {
　　 Object k = maskNull(key);
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 Entry prev = table[i];
　　 Entry e = prev;
　　 while (e != null) { 　如果 e==null 表示不存在
　　　　 Entry next = e.next;
　　　　 if (e.hash == hash && eq(k, e.key)) {
　　　　　　 modCount++;
　　　　　　 size--;
　　　　　　 if (prev == e)
　　　　　　　　 table[i] = next; 链表的第一个元素就是要删除的，这里最好加一句 e.next=null.
　　　　　　 else
　　　　　　　　 prev.next = next; 存在但不是链表的第一个元素，这里最好加一句 e.next=null.
　　　　　　 e.recordRemoval(this);
　　　　　　 return e;
　　　　 }
　　　　 prev = e;
　　　　 e = next;
　　 }
　　 return e; 　　　这里其实就是 return null;
}
这个方法其实也不复杂，也是遍历链表，这里建议加一句 e.next=null, 可以改为
if(prev==e)
　　 table[i]=next;
else
　　 prev.next=next;
e.next=null; 这一句是多加的，可以提高效率。
这里简单说明我的看法：
因为 e 是被删除的节点，删除它其实就是使指向它的指针指向它的后面一个节点。所以 e 可以作为 GC 回收的对象。可事 e 还有一个 next 指针指向我们的数据。如果 e 没有被回收，而且此时 e.next 指向的节点也变为没用的了，但是却有一个它的引用 (e.next), 所以虽然 e 的下一个节点没用了，但是却不能作为 GC 回收的对象，除非 e 先被回收。虽然不一定会引起很大的问题，但是至少会影响 GC 的回收效率。就像数据库中的外键引用一样，删除起来很麻烦呀。

Entry removeMapping(Object o) {
　　 if (!(o instanceof Map.Entry))
　　　　 return null;
　　 Map.Entry entry = (Map.Entry)o;
　　 Object k = maskNull(entry.getKey());
　　 int hash = hash(k);
　　 int i = indexFor(hash, table.length);
　　 Entry prev = table[i];
　　 Entry e = prev;
　　 while (e != null) {
　　　　 Entry next = e.next;
　　　　 if (e.hash == hash && e.equals(entry)) {
　　　　　　 modCount++;
　　　　　　 size--;
　　　　　　 if (prev == e)
　　　　　　　　 table[i] = next;
　　　　　　 else
　　　　　　　　 prev.next = next;
　　　　　　 e.recordRemoval(this);
　　　　　　 return e;
　　　　　 }
　　　　　 prev = e;
　　　　　 e = next;
　　 }
　　 return e;
}
这个方法和上面的一样。

public void clear() {
　　 modCount++;
　　 Entry tab[] = table;
　　 for (int i = 0; i < tab.length; i++)
　　　　 tab[i] = null;
　　 size = 0;
}
同样可以改进

public boolean containsValue(Object value) {
　　 if (value == null)
　　　　 return containsNullValue();
　　 Entry tab[] = table;
　　 for (int i = 0; i < tab.length ; i++)
　　　　 for (Entry e = tab[i] ; e != null ; e = e.next)
　　　　　　 if (value.equals(e.value)) return true;
　　 return false;
}

private boolean containsNullValue() {
　　 Entry tab[] = table;
　　 for (int i = 0; i < tab.length ; i++)
　　　　 for (Entry e = tab[i] ; e != null ; e = e.next)
　　　　　　 if (e.value == null) return true;
　　 return false;
}

public Object clone() {
　　 HashMap result = null;
　　 try {
　　　　 result = (HashMap)super.clone();
　　 }
　　 catch (CloneNotSupportedException e) { // assert false; }
　　 result.table = new Entry[table.length];
　　 result.entrySet = null;
　　 result.modCount = 0;
　　 result.size = 0;
　　 result.init();
　　 result.putAllForCreate(this);
　　 return result;
}

static class Entry implements Map.Entry {
　　 final Object key;
　　 Object value;
　　 final int hash;
　　 Entry next;
　　 Entry(int h, Object k, Object v, Entry n) {
　　　　 value = v;
　　　　 next = n;
　　　　 key = k;
　　　　 hash = h;
　　 }
　　 public Object getKey() {
　　　　 return unmaskNull(key);
　　 }
　　　 public Object getValue() {
　　　　 return value;
　　 }
　　　 public Object setValue(Object newValue) {
　　　　　 Object oldValue = value;
　　　　　 value = newValue;
　　　　　 return oldValue;
　　 }
　　　 public boolean equals(Object o) {
　　　　 if (!(o instanceof Map.Entry)) return false;
　　　　 Map.Entry e = (Map.Entry)o;
　　　　 Object k1 = getKey();
　　　　 Object k2 = e.getKey();
　　　　 if (k1 == k2 || (k1 != null && k1.equals(k2))) {
　　　　　　 Object v1 = getValue();
　　　　　　 Object v2 = e.getValue();
　　　　　　 if (v1 == v2 || (v1 != null && v1.equals(v2))) return true;
　　　　 }
　　　　 return false;
　　　 }
　　　 public int hashCode() {
　　　　　 return (key==NULL_KEY ? 0 : key.hashCode()) ^ (value==null ? 0 : value.hashCode());
　　　 }
　　　 public String toString() {
　　　　　 return getKey() + "=" + getValue();
　　　 }
　　　 void recordAccess(HashMap m) { }
　　　 void recordRemoval(HashMap m) { }
}
一个静态内部类

void addEntry(int hash, Object key, Object value, int bucketIndex) {
　　　 table[bucketIndex] = new Entry(hash, key, value, table[bucketIndex]);
　　　 if (size++ >= threshold)
　　　　　 resize(2 * table.length);
}
注意这个方法，插入连表的头。
可以写成这样更好理解：
Entry oldHead=table[bucketIndex];
Entry newHead = new Entry(hash,key,value,oldHead);
table[bucketIndex]=newHead;

void createEntry(int hash, Object key, Object value, int bucketIndex) {
　　 table[bucketIndex] = new Entry(hash, key, value, table[bucketIndex]);
　　 size++;
}

private abstract class HashIterator implements Iterator {
　　 Entry next;
　　 int expectedModCount;
　　 int index;
　　 Entry current;
　　 HashIterator() {
　　　　 expectedModCount = modCount;
　　　　 Entry[] t = table;
　　　　 int i = t.length;
　　　　 Entry n = null;
　　　　 if (size != 0) {
　　　　　　 while (i > 0 && (n = t[--i]) == null) ;
　　　　 }
　　　　 next = n;
　　　　 index = i;
　　 }
　　 public boolean hasNext() {
　　　　 return next != null;
　　 }
　　 Entry nextEntry() {
　　　　 if (modCount != expectedModCount)
　　　　　　 throw new ConcurrentModificationException();
　　　　 Entry e = next;
　　　　 if (e == null)
　　　　　　 throw new NoSuchElementException();
　　　　 Entry n = e.next;
　　　　 Entry[] t = table;
　　　　 int i = index;
　　　　 while (n == null && i > 0)
　　　　　　　 n = t[--i]; index = i;
　　　　 next = n;
　　　　 return current = e;
　　 }
　　 public void remove() {
　　　　 if (current == null)
　　　　　　 throw new IllegalStateException();
　　　　 if (modCount != expectedModCount)
　　　　　　 throw new ConcurrentModificationException();
　　　　 Object k = current.key;
　　　　 current = null;
　　　　 HashMap.this.removeEntryForKey(k);
　　　　 expectedModCount = modCount;
　　 }
}

private class ValueIterator extends HashIterator {
　　 public Object next() {
　　　　 return nextEntry().value;
　　 }
}

private class KeyIterator extends HashIterator {
　　 public Object next() {
　　　　 return nextEntry().getKey();
　　 }
}

private class EntryIterator extends HashIterator {
　　 public Object next() {
　　　　 return nextEntry();
　　 }
}

Iterator newKeyIterator() {
　　 return new KeyIterator();
}

Iterator newValueIterator() {
　　 return new ValueIterator();
}

Iterator newEntryIterator() { 　
　　 return new EntryIterator();
}

private transient Set entrySet = null;

public Set keySet() {
　　 Set ks = keySet;
　　 return (ks != null ? ks : (keySet = new KeySet()));
}

private class KeySet extends AbstractSet {
　　 public Iterator iterator() {
　　　　 return newKeyIterator();
　　 }
　　 public int size() {
　　　　 return size;
　　 }
　　 public boolean contains(Object o) {
　　　　 return containsKey(o);
　　 }
　　 public boolean remove(Object o) {
　　　　 return HashMap.this.removeEntryForKey(o) != null;
　　 }
　　 public void clear() {
　　　　 HashMap.this.clear();
　　 }
}

public Collection values() {
　　 Collection vs = values; return (vs != null ? vs : (values = new Values()));
}

private class Values extends AbstractCollection {
　　 public Iterator iterator() {
　　　　 return newValueIterator();
　　 }
　　 public int size() {
　　　　 return size;
　　 }
　　 public boolean contains(Object o) {
　　　　 return containsValue(o);
　　 }
　　 public void clear() {
　　　　 HashMap.this.clear();
　　 }
}

public Set entrySet() {
　　 Set es = entrySet;
　　 return (es != null ? es : (entrySet = new EntrySet()));
}

private class EntrySet extends AbstractSet {
　　 public Iterator iterator() {
　　　　 return newEntryIterator();
　　 }
　　 public boolean contains(Object o) {
　　　　 if (!(o instanceof Map.Entry))
　　　　　　 return false;
　　　　 Map.Entry e = (Map.Entry)o;
　　　　 Entry candidate = getEntry(e.getKey());
　　　　 return candidate != null && candidate.equals(e);
　　 }
　　 public boolean remove(Object o) {
　　　　 return removeMapping(o) != null;
　　 }
　　 public int size() {
　　　　 return size;
　　 }
　　 public void clear() {
　　　　 HashMap.this.clear();
　　 }
}

private void writeObject(java.io.ObjectOutputStream s) throws IOException {
　　 s.defaultWriteObject();
　　 s.writeInt(table.length);
　　 s.writeInt(size);
　　 for (Iterator i = entrySet().iterator(); i.hasNext(); ) {
　　　　 Map.Entry e = (Map.Entry) i.next();
　　　　 s.writeObject(e.getKey());
　　　　 s.writeObject(e.getValue());
　　 }
}

private static final long serialVersionUID = 362498820763181265L;

private void readObject(java.io.ObjectInputStream s) throws IOException, ClassNotFoundException {
　　 s.defaultReadObject();
　　 int numBuckets = s.readInt();
　　 table = new Entry[numBuckets];
　　 init();
　　 size = s.readInt(); for (int i=0;
　　 for (int i=0; i
　　　　　 Object key = s.readObject();
　　　　　 Object value = s.readObject(); 　
　　　　　 putForCreate(key, value);
　　　 }
}

int capacity() {
　 return table.length;
}
float loadFactor() {
　　 return loadFactor;