谈谈对于Java HashMap的一些理解（JDK1.8）

惜许

已于 2023-07-14 17:06:12 修改

阅读量106

点赞数

分类专栏： JAVA基础文章标签： java 哈希算法算法

于 2023-04-27 17:28:46 首次发布

本文链接：https://blog.csdn.net/a734474820/article/details/130411017

版权

JAVA基础专栏收录该内容

18 篇文章 1 订阅

订阅专栏

文章详细介绍了HashMap的内部实现，包括默认容量16，最大容量2^30，扩容因子0.75f，以及何时转为红黑树。当链表长度超过8时，HashMap会将链表转换为红黑树以优化查找效率。此外，文章还提到HashMap的hash值计算策略和数组容量为2的幂次方的原因。

摘要由CSDN通过智能技术生成

前言

HashMap结构

Hash源码相关理解

关键变量以及Node节点定义

    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // 默认容量 1<<4  == 10000  即默认容量为2^4 = 16


    static final int MAXIMUM_CAPACITY = 1 << 30; //最大容量  1<< 30  == 2^30  因为java中int占32位，同时int是有符号整形，第一位为符号位，因此最大位数是31 即 最大值为2^30

  
    static final float DEFAULT_LOAD_FACTOR = 0.75f;  //扩容因子 表示当容量为16*0.75f = 12时 hashmap就会进行扩容， 为什么是0.75f呢 泊松分布计算得来

  
    static final int TREEIFY_THRESHOLD = 8; // 转树结构 链表阈值

   
    static final int UNTREEIFY_THRESHOLD = 6; //树结构退化成链表阈值

   
    static final int MIN_TREEIFY_CAPACITY = 64;  // 转树结构 数组阈值
    transient Node<K,V>[] table; //hashmap对应结点数组

    int threshold; //要调整大小的下一个大小的值 是容量*扩容因子的结果

    final float loadFactor; //扩容因子

    static class Node<K,V> implements Map.Entry<K,V> {
        final int hash; //结点对应的hash值 == 【key.hashCode()) ^ (h >>> 16）】
        final K key; //结点对应的key
        V value; //结点对应的value
        Node<K,V> next; //指向下一个结点

        Node(int hash, K key, V value, Node<K,V> next) {
            this.hash = hash;
            this.key = key;
            this.value = value;
            this.next = next;
        }
    }

put方法

  public V put(K key, V value) {
        return putVal(hash(key), key, value, false, true);
    }
    
    //hash值计算为 key对应的hashcode 异或 hashcode 无符号右移16位  也就是拿hashcode的高十六位进行计算 这样分布会更均匀
    static final int hash(Object key) {
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);  
    }

	
    final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
        Node<K,V>[] tab; Node<K,V> p; int n, i; //定义hash桶数组  头结点p  n表示长度，i表示要插入的数组下标
        if ((tab = table) == null || (n = tab.length) == 0)
            n = (tab = resize()).length; //tab为空 进行数组扩容
        if ((p = tab[i = (n - 1) & hash]) == null) //这里为什么数组容量必须是2次幂？因为 2^n-1 换算成二进制均为1，&hash值后可以保证数据均匀插入，减少hash冲突
            //如果计算出来的数组下标对应的Node节点为null，则直接新建Node节点并存入数组中
            tab[i] = newNode(hash, key, value, null);
        else {
            Node<K,V> e; K k;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                e = p;// 记录链表头节点p
            else if (p instanceof TreeNode)
                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); //如果当前非链表是树结构，则进行红黑树相关插入
            else {
                //进行链表数据插入，从链表头进行遍历
                for (int binCount = 0; ; ++binCount) {
                    if ((e = p.next) == null) { // 如果p下一个节点为null，则新增node节点，执行插入动作
                        p.next = newNode(hash, key, value, null);
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st  //当链表上数据等于阈值时，会进化成红黑树
                            treeifyBin(tab, hash);
                        break;
                    }
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k)))) //如果找到hash和key匹配的Node节点，则跳出循环，此时e就是要替换的节点
                        break;
                    p = e; //相当于 p = p.next
                }
            }
            if (e != null) { // put方法返回值为对应key的旧值 oldvalue
                V oldValue = e.value;
                if (!onlyIfAbsent || oldValue == null)
                    e.value = value; //替换key对应的value为新value
                afterNodeAccess(e);
                return oldValue;
            }
        }
        ++modCount;
        if (++size > threshold)
            resize(); //扩容
        afterNodeInsertion(evict);
        return null;
    }

get方法

    public V get(Object key) {
        Node<K,V> e;
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    }

    final Node<K,V> getNode(int hash, Object key) {
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (first = tab[(n - 1) & hash]) != null) { //依旧先根据传入的key，计算hash & 数组长度-1  找到数组下标 对应的链表头结点First
            if (first.hash == hash && //
                ((k = first.key) == key || (key != null && key.equals(k))))
                return first;  //如果First即为匹配的Node结点，则直接返回
            if ((e = first.next) != null) {
                if (first instanceof TreeNode)
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key); //如果first是树结构，则通过对应方式查找
                do {
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;  //不断遍历链表，找到满足条件的Node结点
                } while ((e = e.next) != null);
            }
        }
        return null;
    }

扩容方法

 final Node<K,V>[] resize() {
        Node<K,V>[] oldTab = table;
        int oldCap = (oldTab == null) ? 0 : oldTab.length; //获取当前数组容量
        int oldThr = threshold; //获取旧的阈值
        int newCap, newThr = 0;
        if (oldCap > 0) {
            if (oldCap >= MAXIMUM_CAPACITY) {  //如果旧容量比最大值还大，直接把扩容阈值给Integer.MAX_VALUE
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }
            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                     oldCap >= DEFAULT_INITIAL_CAPACITY)
                newThr = oldThr << 1; // 扩容数组大小为原来的2倍  扩容阈值也调整为原来的2倍
        }
        else if (oldThr > 0)
            newCap = oldThr;
        else {       // 初始化时执行逻辑，赋值初始化容量以及扩容阈值
            newCap = DEFAULT_INITIAL_CAPACITY;
            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
        }
        if (newThr == 0) {
            float ft = (float)newCap * loadFactor;
            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                      (int)ft : Integer.MAX_VALUE);
        }
        threshold = newThr; //赋值新的扩容阈值
        @SuppressWarnings({"rawtypes","unchecked"})
            Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; //创建新的数组
        table = newTab;
        if (oldTab != null) {
            for (int j = 0; j < oldCap; ++j) {
                Node<K,V> e;
                if ((e = oldTab[j]) != null) { //对旧数组从头开始遍历
                    oldTab[j] = null; //断开下标j对应的node结点
                    if (e.next == null)
                        newTab[e.hash & (newCap - 1)] = e; // 元素没有后续节点，直接放入新数组对应索引位置
                    else if (e instanceof TreeNode)
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); //如果结点是树结构，则执行对应树结构插入
                    else { // 尾插法转移链表数据
                        Node<K,V> loHead = null, loTail = null; //低位头结点以及尾结点
                        Node<K,V> hiHead = null, hiTail = null; //高位头结点以及尾结点
                        Node<K,V> next;
                        do {
                            next = e.next;
                            if ((e.hash & oldCap) == 0) { //索引不进行变化，放入新数组和原数组一样的位置
                                if (loTail == null)
                                    loHead = e;
                                else
                                    loTail.next = e;
                                loTail = e;
                            }
                            else {
                                // 需要重新计算元素在新数组中的位置
                                if (hiTail == null)
                                    hiHead = e;
                                else
                                    hiTail.next = e;
                                hiTail = e;
                            }
                        } while ((e = next) != null);
                        if (loTail != null) {
                            loTail.next = null;
                            newTab[j] = loHead; //将该头节点放到新数组时的索引位置等于其在旧数组时的索引位置,记为低位区链表lo开头-low
                        }
                        if (hiTail != null) {
                            // 重新计算的数组索引位置也就是原索引加上原数组长度
                            hiTail.next = null;
                            newTab[j + oldCap] = hiHead; //将该头节点放到新数组时的索引位置等于其在旧数组时的索引位置再加上旧数组长度，记为高位区链表hi开头high.
                        }
                    }
                }
            }
        }
        return newTab;
    }

扩容高低位插入链表示意图

转红黑树

    //当数组长度大于64时，会替换链表为红黑树结构
    final void treeifyBin(Node<K,V>[] tab, int hash) {
        int n, index; Node<K,V> e;
        if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) //数组长度小于64
            resize();
        else if ((e = tab[index = (n - 1) & hash]) != null) {
            TreeNode<K,V> hd = null, tl = null;
            do {
                TreeNode<K,V> p = replacementTreeNode(e, null);
                if (tl == null)
                    hd = p;
                else {
                    p.prev = tl;
                    tl.next = p;
                }
                tl = p;
            } while ((e = e.next) != null);
            if ((tab[index] = hd) != null)
                hd.treeify(tab);
        }
    }

总结

HashMap默认容量为16，最大容量为2^30，扩容因子0.75f，扩容时数组容量为原来2倍；
转红黑树的条件：数组长度大于64并且数组下标i对应链表长度大于8，则数组下标对应链表转红黑树；
Node节点Hash值计算规则为 h = key.hashCode()) ^ (h >>> 16) key对应的hashcode 异或 hashcode 无符号右移16位，这样计算hash分布更加均匀；
HashMap容量为2次幂，因为2^n-1换算成二进制均为1，&hash值后可以保证数据均匀插入，减少hash冲突;
HashMap只允许一个key为null，多个value为null；