JDK7-hashmap源码

最新推荐文章于 2022-07-15 23:34:55 发布

简珍

最新推荐文章于 2022-07-15 23:34:55 发布

阅读量456

点赞数 1

本文链接：https://blog.csdn.net/JaneZ1998/article/details/108941510

版权

JDK7-HashMap

1、简介

	public static void main(String[] args) {
        // jdk7 : 数组 + 单向链表
        HashMap<String, String> map = new HashMap<>();
        map.put("k1", "v1");   		// (k1,v1) => Entry
        System.out.println(map.put("k1", "v2"));  // v1
    }

HashMap的put操作为什么不像如下ArrayList中add操作一样 => 这样get(key)时得遍历数组和链表，效率低。

    // ArrayList中的add操作
	public boolean add(E e) {
        ensureCapacityInternal(size + 1);  
        elementData[size++] = e;  //
        return true;
    }
    public void add(int index, E element) {
        rangeCheckForAdd(index);
        ensureCapacityInternal(size + 1);  
        System.arraycopy(elementData, index, elementData, index + 1, size - index);
        elementData[index] = element; //
        size++;
    }

在这里插入图片描述

2、put/get源码分析

	// 阈值
	int threshold;
	// 默认的初始容量
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; 
    static final int MAXIMUM_CAPACITY = 1 << 30;
	// 默认加载因子
    static final float DEFAULT_LOAD_FACTOR = 0.75f;
	static final Entry<?,?>[] EMPTY_TABLE = {};
    transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;
    // 元素个数
    transient int size;
	// 加载因子
	final float loadFactor;
    transient int modCount;
	// hash种子
	transient int hashSeed = 0;
	static final int ALTERNATIVE_HASHING_THRESHOLD_DEFAULT = Integer.MAX_VALUE;


	// 构造器
	public HashMap() {
        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
    }
	public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0)
            throw new IllegalArgumentException("Illegal initial capacity: " +
                                               initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " +
                                               loadFactor);

        this.loadFactor = loadFactor;
        threshold = initialCapacity;	
        init();	// hashmap中实现为空
    }

	// put方法
	public V put(K key, V value) {
        //若table为null则初始化
        if (table == EMPTY_TABLE) {
            inflateTable(threshold);// 此时threshold值为数组的初始化大小（在构造器中赋的值）
        }
        // 将key为null的entry存在table[0]的链表上
        if (key == null)
            return putForNullKey(value);
        // 计算hash
        int hash = hash(key);  
        // 计算数组下标
      	/*
      		计算出的数组下标要求：
      			1)、在[0,length-1]范围内 
      			2)、0~length-1出现的概率要均匀（避免有的链表过长）
      	*/
        int i = indexFor(hash, table.length);
        // 遍历table[i]位置上链表，处理key相同的情况,value覆盖,返回oldValue----------------
        for (Entry<K,V> e = table[i]; e != null; e = e.next) {
            Object k;
            if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this); // hashmap中实现为空
                // 返回旧value
                return oldValue;
            }
        }
        // -------------------------------------------------------------------------
        modCount++;
        // "头插法 + 移动"操作
        addEntry(hash, key, value, i);
        return null;
    }
	
	private void inflateTable(int toSize) {
        // 找到一个大于等于toSize的最小2的幂次方数。若toSize为10，则capacity=16
        // 为什么容量为2的幂次方数？ 计算数组下标时使之落在[0,length-1]范围内
        int capacity = roundUpToPowerOf2(toSize);
        threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
        table = new Entry[capacity];
        initHashSeedAsNeeded(capacity);
    }

	private static int roundUpToPowerOf2(int number) {
    /*
        Integer.highestOneBit(num)   // 找到一个小于等于num的最大2的幂次方数
        (number - 1) << 1  number减1再翻倍
        为什么要减1 ?  若number=16，则返回小于等于[(16-1)*2=30]的最大2的幂次方数是16
        若不减1，若number=16，则返回小于等于[16*2=32]的最大2的幂次方数是32
    */
        return number >= MAXIMUM_CAPACITY
                ? MAXIMUM_CAPACITY
                : (number > 1) ? Integer.highestOneBit((number - 1) << 1) : 1;
    }
	 // 将key为null的entry存在table[0]的链表上
     private V putForNullKey(V value) {
        for (Entry<K,V> e = table[0]; e != null; e = e.next) {
            if (e.key == null) {
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this);
                return oldValue;
            }
        }
        modCount++;
        addEntry(0, null, value, 0);
        return null;
    }	

	void addEntry(int hash, K key, V value, int bucketIndex) {
        // 判断是否需要扩容
        // 阈值threshold = 数组长度length * 加载因子
        if ((size >= threshold) && (null != table[bucketIndex])) {
            resize(2 * table.length);  // 以2倍扩容
            hash = (null != key) ? hash(key) : 0;
            bucketIndex = indexFor(hash, table.length);
        }
        createEntry(hash, key, value, bucketIndex);
    }
    // 扩容
	void resize(int newCapacity) {
        Entry[] oldTable = table;
        int oldCapacity = oldTable.length;
        if (oldCapacity == MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return;
        }
        Entry[] newTable = new Entry[newCapacity];
     /*	
        initHashSeedAsNeeded方法：比较capacity和ALTERNATIVE_HASHING_THRESHOLD的大小，若设置了VM Options（jdk.map.althashing.threshold）：Holder.ALTERNATIVE_HASHING_THRESHOLD的值就是设置的值，否则为Integer.MAX_VALUE。
        
        扩容的话，容量发生变化，要重新比较容量与ALTERNATIVE_HASHING_THRESHOLD的大小。
        capacity>=ALTERNATIVE_HASHING_THRESHOLD，在initHashSeedAsNeeded方法中会重新生成hashSeed，那么肯定要重新计算hash（此时transfer第二个参数为true）。     
    */
        transfer(newTable, initHashSeedAsNeeded(newCapacity));
        // 重新赋值给hashmap的属性
        table = newTable;
        // 重新计算阈值
        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
    }

	/*
		newTable：容量翻倍的新的空数组。
		双重循环遍历数组+链表
	*/
    void transfer(Entry[] newTable, boolean rehash) {
        int newCapacity = newTable.length;
        for (Entry<K,V> e : table) {
            while(null != e) {
                Entry<K,V> next = e.next;
                if (rehash) {
                    e.hash = null == e.key ? 0 : hash(e.key);
                }
/*
           若rehash为false：
                扩容前：				  |
                     hash : 0000 0101	 |	hash : 0001 0101
                     15   : 0000 1111	 |	15   : 0000 1111
                    ------------------	 |	------------------
                     &      0000 0101 	 |	&      0000 0101	
									     |
               	扩容后：			      |
                     hash : 0000 0101 	|   	hash : 0001 0101
                     31   : 0001 1111	|	    31   : 0001 1111
                    ------------------	|	   ------------------
                     &      0000 0101   |	     &     0001 0101
                        				|
            仍在table[i]原位置所在链表     | 在table[i+length]原位置+oldTable.length链表上
                        
					
		若rehash为false，转移后元素位置：要么原位置链表，要么（原位置+oldTable.length）链表上
		只不过链表的顺序反了。
					
*/
                int i = indexFor(e.hash, newCapacity);
                e.next = newTable[i];
                newTable[i] = e;
                e = next;
            }
        }
    }

	// "头插法 + 移动"操作
	void createEntry(int hash, K key, V value, int bucketIndex) {
        Entry<K,V> e = table[bucketIndex];
        table[bucketIndex] = new Entry<>(hash, key, value, e);
        size++;
    }

	// Entry中hash属性的值就是此方法返回的值
	final int hash(Object k) {
        // hashSeed：为了让hash算法计算出的hash值更散列一点
        int h = hashSeed;   //hashSeed默认为0
        if (0 != h && k instanceof String) {
            return sun.misc.Hashing.stringHash32((String) k);
        }
        // 异或操作：相同为0,不同为1
        h ^= k.hashCode();
        
        /* hash为什么要进行右移异或运算
                h:    0110 0101
                15:   0000 1111

                h:    1110 0101
                15:   0000 1111

                h:    0000 0101
                15:   0000 1111
                
           如上3组&运算，其结果都一样，这样与高位无关，若无下面的右移异或运算，这样很多key值最后计
           算出的下标可能都是一样的，这样造成链表过长。
               
         */ 
        // 无符号右移、异或运算。	使之在indexFor中计算数组下标时，使高位与结果有关，提高散列性。
        h ^= (h >>> 20) ^ (h >>> 12);
        return h ^ (h >>> 7) ^ (h >>> 4);
    }

	/*	返回数组下标。
	  为什么此前要求数组容量为2的幂次方数：使【h&(length-1)】计算出的下标在[0,length-1]范围内。
	
		若h为0101 1101，length为16（0001 0000）
	
		 		h:	 0101 1101
			    15:  0000 1111
		-----------------------
			     &   0000 1101
	*/
	static int indexFor(int h, int length) {
        // 为什么 & 不是 % ( h%length )运算？   &运算更快
        return h & (length-1);
    }

	//初始化hashSeed
	final boolean initHashSeedAsNeeded(int capacity) {
        boolean currentAltHashing = hashSeed != 0;  // hashSeed默认为0
        // sun.misc.VM.isBooted()	VM是否启动
        // 若不设置环境变量jdk.map.althashing.threshold，Holder.ALTERNATIVE_HASHING_THRESHOLD的值就是Integer.MAX_VALUE,所以useAltHashing一般为false，也就是返回值一般是为false。
        boolean useAltHashing = sun.misc.VM.isBooted() &&
                (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
        // 异或：currentAltHashing、useAltHashing不相等返回true
        boolean switching = currentAltHashing ^ useAltHashing;
        if (switching) {
            // 重新生成hash种子
            hashSeed = useAltHashing
                ? sun.misc.Hashing.randomHashSeed(this)
                : 0;
        }
        return switching;
    }

	private static class Holder {
        static final int ALTERNATIVE_HASHING_THRESHOLD;
        static {
            // 取环境变量jdk.map.althashing.threshold的值。
            // 在idea中可设置VM Options进行测试：-Djdk.map.althashing.threshold=8。
            String altThreshold = java.security.AccessController.doPrivileged(
                new sun.security.action.GetPropertyAction(
                    "jdk.map.althashing.threshold"));
            
            int threshold;
            try {
                //altThreshold为null，threshold就为Integer.MAX_VALUE
                threshold = (null != altThreshold)
                        ? Integer.parseInt(altThreshold)
                        : ALTERNATIVE_HASHING_THRESHOLD_DEFAULT;

                if (threshold == -1) {
                    threshold = Integer.MAX_VALUE;
                }
                if (threshold < 0) {
                    throw new IllegalArgumentException("value must be positive integer.");
                }
            } catch(IllegalArgumentException failed) {
                throw new Error("Illegal value for 'jdk.map.althashing.threshold'", failed);
            }
            // 赋值
            ALTERNATIVE_HASHING_THRESHOLD = threshold;
        }
    }


	// get方法
	public V get(Object key) {
        if (key == null)
            return getForNullKey();
        Entry<K,V> entry = getEntry(key);
        return null == entry ? null : entry.getValue();
    }
 	final Entry<K,V> getEntry(Object key) {
        if (size == 0) {
            return null;
        }
        int hash = (key == null) ? 0 : hash(key);
        // 遍历链表
        for (Entry<K,V> e = table[indexFor(hash, table.length)];
             e != null;
             e = e.next) {
            Object k;
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k))))
                return e;
        }
        return null;
    }
	private V getForNullKey() {
        if (size == 0) {
            return null;
        }
        for (Entry<K,V> e = table[0]; e != null; e = e.next) {
            if (e.key == null)
                return e.value;
        }
        return null;
    }

	// Entry对象
	static class Entry<K,V> implements Map.Entry<K,V> {
        final K key;
        V value;
        Entry<K,V> next;
        int hash;

        Entry(int h, K k, V v, Entry<K,V> n) {
            value = v;
            next = n;
            key = k;
            hash = h;
        }
        // ......
    }

若rehash为false，转移后元素位置：要么原位置链表，要么（原位置+oldTable.length）链表上。只不过链表的顺序反了。

3、多线程操作存在的问题

多线程操作，扩容时存在的问题：出现循环链表。当在此链表上get或put时出现死循环

在多线程并发操作中，尽量避免hashmap扩容（若hashmap所存元素个数在一个已知的范围内，可设置初始大小和加载因子来保证 size < 阈值）。或在更上层代码中做一些并发安全的控制。

  // 多线程put操作时，若2个线程都执行到transfer方法。
  void transfer(Entry[] newTable, boolean rehash) {
        int newCapacity = newTable.length;
        for (Entry<K,V> e : table) {
            while(null != e) {
                Entry<K,V> next = e.next;
                if (rehash) {
                    e.hash = null == e.key ? 0 : hash(e.key);
                }
                int i = indexFor(e.hash, newCapacity);
                e.next = newTable[i];
                newTable[i] = e;
                e = next;
            }
        }
    }

图示：
在这里插入图片描述

扩容后链表长度可能会变短，能使get效率变高
假设扩容后链表仍在table[i]的位置

若当线程1执行完了，线程2才抢到资源来执行。
在这里插入图片描述

图中虚线可忽略

线程2第一轮循环：
在这里插入图片描述

线程2第二轮循环：
在这里插入图片描述

线程2第三轮循环：出现循环链表。当在此链表上get或put时出现死循环。
在这里插入图片描述

图中虚线可忽略

最终图示：
在这里插入图片描述

4、modCount与并发修改异常

	public static void main(String[] args) {
        HashMap<String, String> map = new HashMap<>();
        map.put("k1", "v1");   
        map.put("k2", "v2");
        map.put("k3", "v3");   
        map.put("k4", "v4");
        for(String key : map.keySet()){
            if("k3".equals(key)){
                map.remove(key);
            }
        }
    }	
    // 运行报错：Exception in thread "main" java.util.ConcurrentModificationException

	// 上述代码编译后
	public static void main(String[] args) {
        HashMap<String, String> map = new HashMap();
        map.put("k1", "v1");
        map.put("k2", "v2");
        map.put("k3", "v3");
        map.put("k4", "v4");
        Iterator i$ = map.keySet().iterator(); // 最终返回一个new KeyIterator()
		//每次put操作，modCount++，此时modCount=4，expectedModCount=4
        
        while(i$.hasNext()) {
             // next方法内部会判断modCount、expectedModCount是否相等
            String key = (String)i$.next(); // modCount++
            if ("k3".equals(key)) {
                // remove方法内部会对modCount++,modCount=5,而expectedModCount=4，当进行下一轮循环执行next方法时判断modCount、expectedModCount是不相等的，所以报异常。
                map.remove(key);   // modCount++
            }
        }
    }

	//怎么解决上诉问题  调用迭代器中的remove方法即可
	public static void main(String[] args) {
        HashMap<String, String> map = new HashMap();
        map.put("k1", "v1");
        map.put("k2", "v2");
        map.put("k3", "v3");
        map.put("k4", "v4");
        Iterator i$ = map.keySet().iterator();        
        while(i$.hasNext()) {
            String key = (String)i$.next(); // modCount++
            if ("k3".equals(key)) {
                i$.remove(key);   // modCount++; expectedModCount = modCount;
            }
        }
    }

modCount：修改次数

HashMap源码：

	// KeyIterator() extends HashIterator<K>
	private final class KeyIterator extends HashIterator<K> {
        public K next() {
            return nextEntry().getKey();  
        }
    }

	// HashIterator源码
	private abstract class HashIterator<E> implements Iterator<E> {
        Entry<K,V> next;        
        int expectedModCount;   
        int index;             
        Entry<K,V> current;     
        HashIterator() {
            // **************************************************
            expectedModCount = modCount;	// modCount的默认值为0
            // **************************************************
            if (size > 0) { // advance to first entry
                Entry[] t = table;
                while (index < t.length && (next = t[index++]) == null)
                    ;
            }
        }
        public final boolean hasNext() {
            return next != null;
        }
        // **************************************************
        final Entry<K,V> nextEntry() {
            if (modCount != expectedModCount)
                throw new ConcurrentModificationException();
            Entry<K,V> e = next;
            if (e == null)
                throw new NoSuchElementException();

            if ((next = e.next) == null) {
                Entry[] t = table;
                while (index < t.length && (next = t[index++]) == null)
                    ;
            }
            current = e;
            return e;
        }
        // **************************************************
        public void remove() {
            if (current == null)
                throw new IllegalStateException();
            if (modCount != expectedModCount)
                throw new ConcurrentModificationException();
            Object k = current.key;
            current = null;
            HashMap.this.removeEntryForKey(k); //调用的仍是Hashmap的方法（如下）
            // *********************
            expectedModCount = modCount;
            // *********************
        }
    }

	//hashmap的remove方法
	public V remove(Object key) {
        Entry<K,V> e = removeEntryForKey(key);
        return (e == null ? null : e.value);
    }
	
	final Entry<K,V> removeEntryForKey(Object key) {
        if (size == 0) {
            return null;
        }
        int hash = (key == null) ? 0 : hash(key);  // 计算hash
        int i = indexFor(hash, table.length);   //计算下标
        Entry<K,V> prev = table[i];
        Entry<K,V> e = prev;
		// 遍历下标上的链表
        while (e != null) {
            Entry<K,V> next = e.next;
            Object k;
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k)))) {
                modCount++; // *********************
                size--;
                // 将元素从链表上移除
                if (prev == e)
                    table[i] = next;
                else
                    prev.next = next;
                e.recordRemoval(this);
                return e;
            }
            prev = e;
            e = next;
        }
        return e;
    }

为什么HashMap要设置modCount ？JDK开发者知道在多线程操作同一个HashMap时可能出现并发安全性问题从而提供的一种快速失败(fail-fast)的错误检测机制。

5、Integer的highestOneBit方法

	// Integer中的静态类
    // 为什么移动31次，因为int类型32bit。
	public static int highestOneBit(int i) {
        i |= (i >>  1);
        i |= (i >>  2);
        i |= (i >>  4);
        i |= (i >>  8);
        i |= (i >> 16);  // 到此：若i为001* ****	，i变为0011 1111
        return i - (i >>> 1); // 若i为001* **** ，则返回 0010 0000 	
    }
    /*
                0010 0000
        >>1 	0001 0000
        |		0011 0000
        >>2		0000 1100
        |		0011 1100
        >>4		0000 0011
        |		0011 1111

        >>>1	0001 1111
        -
        ---------------------
        return 	0010 0000

    */