HashMap之putVal的理解

大师叨叨

已于 2024-05-20 22:06:22 修改

阅读量234

点赞数 11

文章标签：哈希算法 java

于 2024-04-21 20:24:35 首次发布

本文链接：https://blog.csdn.net/Hello___A__World/article/details/138043841

版权

“桶”(bucket)：哈希表通常由一个数组构成，数组的每个位置（即桶）可以包含一个链表（或树结构，如红黑树）。在这种情况下，数组每个索引位置上存储的是链表的头节点引用。当键值对根据它们的键的哈希码映射到相同的桶时，这些键值对会以链表的形式按顺序存储。如果一个桶中链表的长度变得太长（链表长度超过了 8），那么链表可能会转化为更高效的树结构，如红黑树（自平衡二叉搜索树），以优化查找性能。
总之，桶的概念是灵活的，它可以是存放单个键值对的数组元素，也可以是存放多个键值对（通过链表或树）的容器。这是我个人的理解！

哈希冲突 指的是多个元素映射到了相同的哈希值或桶（数组位置）上，导致它们需要存储在同一个位置。

1、putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict)

当向HashMap添加元素时，如果某个桶中链表的长度达到了TREEIFY_THRESHOLD（该值为8）阈值，并且HashMap的总容量大于或等于MIN_TREEIFY_CAPACITY（默认值为64），则该链表会被转换为红黑树。如果不满足转换条件，可能会触发哈希表的容量扩张。


// 通过将哈希码自身右移 16 位，然后与自身进行异或操作
// 将高位信息混合到低位，提高了低位的随机性。
// 这样，当执行 (n - 1) & hash 操作时，哈希码低位的随机性会更高，有助于减少冲突和更均匀地分布键值对。
// 举个例子：
// h = 99162322 ---->  101111010010001100011010010
// 右移16位： 101111010010001100011010010 -> 10111101001（1513）
// 异或操作：99162322 ^ 1513 = 99163451
static final int hash(Object key) {
    int h;
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

public V put(K key, V value) {
    return putVal(hash(key), key, value, false, true);
}

/**
 * Implements Map.put and related methods
 *
 * @param hash hash for key
 * @param key the key
 * @param value the value to put
 * @param onlyIfAbsent if true, don't change existing value
 * @param evict if false, the table is in creation mode.
 * @return previous value, or null if none
 */
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
               boolean evict) {
    Node<K,V>[] tab; Node<K,V> p; int n, i;
    // n的值只能是2的幂（取决于tableSizeFor()函数），比如16、32、64、128
    if ((tab = table) == null || (n = tab.length) == 0)
        // table = null 或者 table的长度为0时
        // table为 HashMap内部数组
        n = (tab = resize()).length;
    // (n - 1) & hash 的值相当于 hash % n 即取模运算
    // 这里的 (n - 1) 等同于一个掩码，它会保留哈希码低位上的数值，并丢弃高位的数值。
    // 这种方式计算出来的索引相当于哈希码对 n 取模，
    // 因为它有效地截断了超过 n（必须是 2 的整数次幂）允许范围的任何部分。
    if ((p = tab[i = (n - 1) & hash]) == null)
        // tab[i]为空，就用提供的键和值创建一个新节点并放在这个索引（i）上
        tab[i] = newNode(hash, key, value, null);
    else {
        // 处理冲突的情形，前提p不为空，即索引i处的“桶”不为空
        Node<K,V> e; K k;
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            // 检查第节点的键是否与我们要插入的键相匹配，匹配就更新这个节点（即 "e"）的值。
            e = p;
        else if (p instanceof TreeNode)
            // 如果节点是树节点，即红黑树
            e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
        else {
            // 如果节点是链表节点，对链表进行遍历
            for (int binCount = 0; ; ++binCount) {
                // 1、p.next为空，说明到链表末端了
                if ((e = p.next) == null) {
                    // 链表末尾添加一个新的节点
                    p.next = newNode(hash, key, value, null);
                    // 如果链表太长，超过了8，调用 treeifyBin 方法将链表转换成树结构（红黑树）
                    if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                        treeifyBin(tab, hash);
                    break;
                }
                // 2、e = p.next不为空，与将要存储的值进行比较（匹配）
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    // 结果匹配，即hashMap中已经存在该值了，结束遍历。
                    break;
                // 3、将e = p.next赋值给p，即p指向下一个节点，继续遍历；  
                p = e;
            }
        }
        if (e != null) { // existing mapping for key
            V oldValue = e.value;
            // // 找到了一个存在的节点 "e"（即上面的第2步），更新这个值
            if (!onlyIfAbsent || oldValue == null)
                e.value = value;
            afterNodeAccess(e);
            // 返回这个值
            return oldValue;
        }
    }
    ++modCount; // 修改计数，用于迭代时检测并发修改，快速响应失败。
    if (++size > threshold)
        resize(); // 哈希表的大小超过了扩容阈值（threshold），调用 resize 方法来扩容哈希表。
    afterNodeInsertion(evict);
    return null;
}

2、resize()

/**
 * Initializes or doubles table size.  If null, allocates in
 * accord with initial capacity target held in field threshold.
 * Otherwise, because we are using power-of-two expansion, the
 * elements from each bin must either stay at same index, or move
 * with a power of two offset in the new table.
 *
 * @return the table
 */
final Node<K,V>[] resize() {
    Node<K,V>[] oldTab = table; // The current table.
    int oldCap = (oldTab == null) ? 0 : oldTab.length; // Current capacity.
    int oldThr = threshold; // Old threshold to determine when to resize.
    int newCap, newThr = 0;
    if (oldCap > 0) {
        // MAXIMUM_CAPACITY = 1 << 30 (2^30 = 1,073,741,824)
        if (oldCap >= MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return oldTab;
        }
        else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                 oldCap >= DEFAULT_INITIAL_CAPACITY) // DEFAULT_INITIAL_CAPACITY = 1 << 4 = 16
            newThr = oldThr << 1; // double threshold
    } else if (oldThr > 0) { 
        // initial capacity was placed in threshold
        // 说明调用了HashMap（*****）构造函数，初始化了capacity
        newCap = oldThr;
    } else {               // zero initial threshold signifies using defaults
        // 第一次实例化 HashMap() 的时候会走这里，注意是 没有参数的实例化；
        // 因为oldCap = 0，oldThr = 0，oldTab = null，newThr = 0
        newCap = DEFAULT_INITIAL_CAPACITY; // 16
        newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); // 12
    }
    if (newThr == 0) {
        float ft = (float)newCap * loadFactor;
        newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                  (int)ft : Integer.MAX_VALUE);
    }
    threshold = newThr; // Set the new threshold for future resizing.
    @SuppressWarnings({"rawtypes","unchecked"})
        Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; // Create the new table.
    table = newTab; // Replace the reference in HashMap with the new table.
    if (oldTab != null) {
        // Iterate over the old table and rehash all entries.
        for (int j = 0; j < oldCap; ++j) {
            Node<K,V> e;
            if ((e = oldTab[j]) != null) {
                oldTab[j] = null; // Help GC.
                if (e.next == null) // Only one node on list.
                    // bucket桶是单节点链表，直接移动到新数组
                    newTab[e.hash & (newCap - 1)] = e;
                else if (e instanceof TreeNode) // Tree bin.
                    // bucket桶是红黑树，进行分割处理
                    // 当节点数量降低到UNTREEIFY_THRESHOLD = 6以下时，红黑树会再次转回链表
                    ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);// Split nodes into proper bins.
                else { // Preserve order for linked list bins.
                    // hi: 通常是指节点的高位（high）部分，常用于表示链表中较大的值或者后续节点。
                    // lo: 通常是指节点的低位（low）部分，常用于表示链表中较小的值或者前驱节点。
                    // Tail: 尾节点，即链表中的最后一个节点，通常用于表示链表的最后部分。
                    // Head: 头节点，即链表的第一个节点，通常用于表示链表的开头。
                    Node<K,V> loHead = null, loTail = null;
                    Node<K,V> hiHead = null, hiTail = null;
                    Node<K,V> next;
                    do {
                        next = e.next;
                        // Calculate whether it should stay at same index or move by oldCap.
                        if ((e.hash & oldCap) == 0) {
                        // 索引（index）位置不变
                            if (loTail == null)
                                loHead = e;
                            else
                                loTail.next = e;
                            loTail = e;
                        }
                        else {
                            if (hiTail == null)
                                hiHead = e;
                            else
                                hiTail.next = e;
                            hiTail = e;
                        }
                    } while ((e = next) != null);
                    if (loTail != null) {
                        loTail.next = null;
                        newTab[j] = loHead;
                    }
                    if (hiTail != null) {
                        hiTail.next = null;
                        // 新索引（index = j + oldCap）= 原索引 + 旧数组（oldTab）的长度，即oldCap
                        newTab[j + oldCap] = hiHead;
                    }
                }
            }
        }
    }
    return newTab;
}

3、treeifyBin(Node<K,V>[] tab, int hash)

/**
 * Replaces all linked nodes in bin at index for given hash unless
 * table is too small, in which case resizes instead.
 */
final void treeifyBin(Node<K,V>[] tab, int hash) {
    int n, index; Node<K,V> e;
    // 如果链表（tab）的长度小于MIN_TREEIFY_CAPACITY（64），则resize
    if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
        resize();
    else if ((e = tab[index = (n - 1) & hash]) != null) {
        // hd：Head 头
        // tl：Tail 尾
        TreeNode<K,V> hd = null, tl = null;
        do {
            TreeNode<K,V> p = replacementTreeNode(e, null);
            if (tl == null)
                hd = p;
            else {
                p.prev = tl;
                tl.next = p;
            }
            tl = p;
        } while ((e = e.next) != null);
        if ((tab[index] = hd) != null)
            hd.treeify(tab);
    }
}