面试必备--手撕HashMap源码

最新推荐文章于 2024-03-24 20:26:42 发布

会写代码的考研小哥

最新推荐文章于 2024-03-24 20:26:42 发布

阅读量216

点赞数

分类专栏：源码解析文章标签： hashmap

本文链接：https://blog.csdn.net/weixin_38499256/article/details/105840568

版权

源码解析专栏收录该内容

7 篇文章 0 订阅

订阅专栏

HashMap在jdk1.8里的存储结构是数组加链表或者红黑树。具体需要看整个HashMap的长度是否达到最小树化容量阈值(MIN_TREEIFY_CAPACITY默认64)和链表的长度是否达到树化阈值(TREEIFY_THRESHOLD默认是8)。
文章中还会讲到hashmap如何扩容和为什么要这么扩容等硬核知识。
HashMap涉及到的树化操作我会在后面单独写一篇关于红黑树的文章来分析。
好的，那么下面我为大家分析几个常用的HashMap方法。

1、HashMap()

/**
  * Constructs an empty <tt>HashMap</tt> with the specified initial
  * capacity and load factor.
  * 初始化hashmap
  *
  * @param  initialCapacity the initial capacity
  * @param  loadFactor      the load factor
  * @throws IllegalArgumentException if the initial capacity is negative
  *         or the load factor is nonpositive
  */
public HashMap(int initialCapacity, float loadFactor) {
    if (initialCapacity < 0)
        throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity);
    //如果扩容大小超过了最大值，则直接赋值最大值
    if (initialCapacity > MAXIMUM_CAPACITY)
        initialCapacity = MAXIMUM_CAPACITY;
    if (loadFactor <= 0 || Float.isNaN(loadFactor))
        throw new IllegalArgumentException("Illegal load factor: " + loadFactor);
    //负载因子
    this.loadFactor = loadFactor;
    //扩容阈值
    this.threshold = tableSizeFor(initialCapacity);
}

2、get()

/**
  * Returns the value to which the specified key is mapped,
  * or {@code null} if this map contains no mapping for the key.
  *
  * <p>More formally, if this map contains a mapping from a key
  * {@code k} to a value {@code v} such that {@code (key==null ? k==null :
  * key.equals(k))}, then this method returns {@code v}; otherwise
  * it returns {@code null}.  (There can be at most one such mapping.)
  *
  * <p>A return value of {@code null} does not <i>necessarily</i>
  * indicate that the map contains no mapping for the key; it's also
  * possible that the map explicitly maps the key to {@code null}.
  * The {@link #containsKey containsKey} operation may be used to
  * distinguish these two cases.
  *
  * @see #put(Object, Object)
  */
public V get(Object key) {
    Node<K,V> e;
    return (e = getNode(hash(key), key)) == null ? null : e.value;
}

/**
  * Implements Map.get and related methods
  *
  * @param hash hash for key
  * @param key the key
  * @return the node, or null if none
  */
final Node<K,V> getNode(int hash, Object key) {
    //tab: 引用当前hashmap的散列表
    //first: 桶位中的头元素
    //e: 临时node元素
    //n: table数组长度
    Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
    if ((tab = table) != null && (n = tab.length) > 0 &&
        (first = tab[(n - 1) & hash]) != null) {
        //第一种情况：定位出来的桶位元素，即为需要get的数据
        if (first.hash == hash && // always check first node
            ((k = first.key) == key || (key != null && key.equals(k))))
            return first;
        //说明当前桶位不止一个元素，可能是 链表 或者 红黑树
        if ((e = first.next) != null) {
            //第二种情况，桶位升级成为 红黑树
            if (first instanceof TreeNode)
                return ((TreeNode<K,V>)first).getTreeNode(hash, key);
            //第三种情况，桶位升级成为 链表
            do {
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    return e;
            } while ((e = e.next) != null);
        }
    }
    return null;
}

3、put()

/**
  * Associates the specified value with the specified key in this map.
  * If the map previously contained a mapping for the key, the old
  * value is replaced.
  *
  * @param key key with which the specified value is to be associated
  * @param value value to be associated with the specified key
  * @return the previous value associated with <tt>key</tt>, or
  *         <tt>null</tt> if there was no mapping for <tt>key</tt>.
  *         (A <tt>null</tt> return can also indicate that the map
  *         previously associated <tt>null</tt> with <tt>key</tt>.)
  */
public V put(K key, V value) {
    return putVal(hash(key), key, value, false, true);
}

/**
  * Implements Map.put and related methods
  *
  * @param hash hash for key
  * @param key the key
  * @param value the value to put
  * @param onlyIfAbsent if true, don't change existing value
  * @param evict if false, the table is in creation mode.
  * @return previous value, or null if none
  *
  * 路由算法：(n - 1) & hash
  */
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
               boolean evict) {
    //tab: 引用当前HashMap的散列表
    //p: 表示当前散列表的元素
    //n: 表示散列表数组的长度
    //i: 表示路由寻址的结果(返回的下标)
    Node<K,V>[] tab; Node<K,V> p; int n, i;

    //延时初始化hashmap，第一次调用putVal方法时会初始化hashmap对象中最低耗费内存的散列表
    if ((tab = table) == null || (n = tab.length) == 0)
        n = (tab = resize()).length;

    //最简单的一种情况：寻址到的桶位刚好是null，这个时候，直接将当前k-v => node放进散列表中
    if ((p = tab[i = (n - 1) & hash]) == null)
        tab[i] = newNode(hash, key, value, null);

    else {
        //e: 如果不为null，表示找到了一个与当前要插入的 k-v 一致的key的元素
        //k: 表示临时的一个key
        Node<K,V> e; K k;

        //表示桶位中的元素与当前需要插入的元素的key完全一致，即将进行替换操作
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            e = p;
        //判断是否是树状，如果是则按红黑树的方式插入
        else if (p instanceof TreeNode)
            e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);

        //链表的情况，而且链表的头元素与需要插入的元素不一致
        else {
            //链表方式插入需要逐个遍历key是否相等
            // 是：替换原有的value
            // 否：在末尾插入
            for (int binCount = 0; ; ++binCount) {
                //条件成立的话，说明已经遍历到末尾了，并且没有找到一个与需要插入的元素的key一致的表内元素
                //说明需要在链表的末尾插入元素
                if ((e = p.next) == null) {
                    p.next = newNode(hash, key, value, null);
                    //条件成立的话，说明当前链表的长度达到树化的标准，需要进行树化
                    if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                        //树化操作
                        treeifyBin(tab, hash);
                    break;
                }
                // 条件成立的话，说明找到了与表内key一致的元素，跳出循环准备进行替换
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    break;
                //把next元素赋值给p，一直到末尾
                p = e;
            }
        }

        //两种情况下会执行这段代码
        //1.桶位中的元素与需要插入的元素的key一致
        //2.链表中的元素与需要插入的元素的key一致
        //此时e中的值为已经存在于hashmap中的值，即准备要被替换的元素
        if (e != null) { // existing mapping for key
            V oldValue = e.value;
            if (!onlyIfAbsent || oldValue == null)
                e.value = value;
            //将需要插入的元素插入到表尾
            afterNodeAccess(e);
            return oldValue;
        }
    }
    //modCount 表示散列表结构被修改的次数，替换node元素的value不计数
    ++modCount;
    //插入新元素，size自增，如果自增后的值大于扩容阈值，需要触发扩容
    if (++size > threshold)
        resize();
    afterNodeInsertion(evict);
    return null;
}

4、remove()

/**
  * Removes the mapping for the specified key from this map if present.
  *
  * @param  key key whose mapping is to be removed from the map
  * @return the previous value associated with <tt>key</tt>, or
  *         <tt>null</tt> if there was no mapping for <tt>key</tt>.
  *         (A <tt>null</tt> return can also indicate that the map
  *         previously associated <tt>null</tt> with <tt>key</tt>.)
  */
public V remove(Object key) {
    Node<K,V> e;
    return (e = removeNode(hash(key), key, null, false, true)) == null ? null : e.value;
}

/**
  * Implements Map.remove and related methods
  *
  * @param hash hash for key
  * @param key the key
  * @param value the value to match if matchValue, else ignored
  * @param matchValue if true only remove if value is equal 如果为ture，则需要匹配key和value都相等才删除；如果为false，则只需要匹配key相等
  * @param movable if false do not move other nodes while removing
  * @return the node, or null if none
  */
final Node<K,V> removeNode(int hash, Object key, Object value,boolean matchValue, boolean movable) {
    //tab: 引用当前hashmap的散列表
    //p: 当前node元素
    //n: table数组的长度
    //index: 计算出的桶位位置
    Node<K,V>[] tab; Node<K,V> p; int n, index;
    if ((tab = table) != null && (n = tab.length) > 0 &&
        (p = tab[index = (n - 1) & hash]) != null) {
        //说明路由的桶位是有数据的，需要进行查找操作，并且删除
        //node: 查找到的元素
        //e: 当前node的next元素
        Node<K,V> node = null, e; K k; V v;
        //第一种情况：当前桶位中的头元素即为需要删除的元素
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            node = p;
        else if ((e = p.next) != null) {
            //第二种情况：当前桶位中的元素升级为红黑树
            if (p instanceof TreeNode)
                node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
            //第三种情况：当前桶位中的元素升级为链表
            else {
                do {
                    if (e.hash == hash &&
                        ((k = e.key) == key ||
                         (key != null && key.equals(k)))) {
                        node = e;
                        break;
                    }
                    p = e;
                } while ((e = e.next) != null);
            }
        }

        //如果node有值，则进入删除逻辑
        //||操作 若运算符左边为true，则不再对运算符右侧进行运算
        //matchValue == false，直接返回true，不需要执行后面的判断
        //matchValue == true，执行 (v = node.value) == value 的判断是否true，如果为false则继续下一个操作的判断，为true返回，以此类推。
        if (node != null && (!matchValue || (v = node.value) == value || (value != null && value.equals(v)))) {
            //第一种情况：node是树节点，说明需要进行树节点移除操作
            if (node instanceof TreeNode)
                ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
            //第二种情况：桶位元素即为需要删除的元素，则将该元素的下一位元素放至桶位中
            else if (node == p)
                tab[index] = node.next;
            //第三种情况：将当前元素p的下一个元素 设置成 要删除元素的下一个元素
            else
                p.next = node.next;

            ++modCount;
            --size;
            afterNodeRemoval(node);
            return node;
        }
    }
    return null;
}

5、resize()

/**
  * Initializes or doubles table size.  If null, allocates in
  * accord with initial capacity target held in field threshold.
  * Otherwise, because we are using power-of-two expansion, the
  * elements from each bin must either stay at same index, or move
  * with a power of two offset in the new table.
  *
  * 为什么需要扩容？
  * 为了解决哈希冲突导致的链化影响查询效率的问题，扩容会缓解该问题
  *
  * @return the table
  */
final Node<K,V>[] resize() {
    //oldTab: 引用扩容前的哈希表
    Node<K,V>[] oldTab = table;
    //oldCap: 引用扩容前哈希表数组长度
    int oldCap = (oldTab == null) ? 0 : oldTab.length;
    //oldThr: 引用扩容前哈希表的扩容阈值
    int oldThr = threshold;
    //newCap: 扩容后哈希表数组的长度
    //newThr: 扩容后哈希表触发下次扩容的阈值
    int newCap, newThr = 0;

    //条件如果成立说明 hashmap中的散列表已经初始化过了，这是一次正常的扩容
    if (oldCap > 0) {
        //扩容之前的哈希表大小已经达到最大阈值后，则不扩容，且设置扩容条件为int的最大值
        if (oldCap >= MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return oldTab;
        }
        //oldCap左移一位实现数值翻倍，并赋值给newCap，newCap大小<数组最大值限制 且 扩容前的阈值 >= 16(默认阈值)
        //这种情况下，下一次扩容的阈值 等于当前阈值翻倍
        else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY)
            newThr = oldThr << 1; // double threshold
    }

    //oldCap == 0，说明hashmap中的散列表是null
    //1.new HashMap(initCap,loadFactor)
    //2.new HashMap(initCap)
    //3.new HashMap(map)，并且这个map有数据
    else if (oldThr > 0) // initial capacity was placed in threshold
        newCap = oldThr;

    //oldCap == 0，oldThr == 0
    //new HashMap()
    else {               // zero initial threshold signifies using defaults
        newCap = DEFAULT_INITIAL_CAPACITY;
        newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
    }

    //上面步骤没有为newCap赋值的情况都会执行这段代码
    //通过newCap和loadFactor计算出newThr
    if (newThr == 0) {
        float ft = (float)newCap * loadFactor;
        newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                  (int)ft : Integer.MAX_VALUE);
    }

    threshold = newThr;
    //创建出一个更大的数组
    @SuppressWarnings({"rawtypes","unchecked"})
    Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
    table = newTab;

    if (oldTab != null) {

        for (int j = 0; j < oldCap; ++j) {
            //当前node节点
            Node<K,V> e;
            if ((e = oldTab[j]) != null) {
                //方便jvm GC时回收内存
                oldTab[j] = null;
                //第一种情况：当前桶位只有一个元素，从未发生过碰撞
                //计算出此元素在新哈希表中的位置，并放入
                if (e.next == null)
                    newTab[e.hash & (newCap - 1)] = e;
                //第二种情况：桶位已经树化
                else if (e instanceof TreeNode)
                    ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                //第三种情况：桶位已经形成链表
                else { // preserve order
                    //低位链表：存放在扩容之后的数组的下标位置，与当前数组的下标位置一致
                    Node<K,V> loHead = null, loTail = null;
                    //高位链表：存放在扩容之后的数组的下标位置为 当前数组下标位置 + 扩容前数组的长度
                    Node<K,V> hiHead = null, hiTail = null;
                    //表示当前链表的下一个元素
                    Node<K,V> next;
                    do {
                        next = e.next;
                        //hash -> .... 1 1111
                        //hash -> .... 0 1111
                        //oldCap -> 0b 1 0000
                        //如果e.hash & oldCap == 0，则放入低位链表中
                        if ((e.hash & oldCap) == 0) {
                            if (loTail == null)
                                //第一次进入，将此节点设置为头结点
                                loHead = e;
                            else
                                //将节点插入到链表末尾
                                loTail.next = e;
                            //将指针指向最后一个元素
                            loTail = e;
                        }
                        //如果e.hash & oldCap != 0，则放入高位链表中
                        else {
                            if (hiTail == null)
                                //第一次进入，将此节点设置为头结点
                                hiHead = e;
                            else
                                //将节点插入到链表末尾
                                hiTail.next = e;
                            //将指针指向最后一个元素
                            hiTail = e;
                        }
                    } while ((e = next) != null);

                    if (loTail != null) {
                        //此时loTail是指向低位链表的最后一个元素，next可能会指向旧关系的节点，因此需要置为null
                        loTail.next = null;
                        //将低位链表的头指针赋值给新哈希表对应桶位上
                        newTab[j] = loHead;
                    }
                    if (hiTail != null) {
                        //此时hiTail是指向低位链表的最后一个元素，next可能会指向旧关系的节点，因此需要置为null
                        hiTail.next = null;
                        //将高位链表的头指针赋值给新哈希表对应桶位上
                        newTab[j + oldCap] = hiHead;
                    }
                }
            }
        }
    }
    return newTab;
}

6、hash()

/**
  * Computes key.hashCode() and spreads (XORs) higher bits of hash
  * to lower.  Because the table uses power-of-two masking, sets of
  * hashes that vary only in bits above the current mask will
  * always collide. (Among known examples are sets of Float keys
  * holding consecutive whole numbers in small tables.)  So we
  * apply a transform that spreads the impact of higher bits
  * downward. There is a tradeoff between speed, utility, and
  * quality of bit-spreading. Because many common sets of hashes
  * are already reasonably distributed (so don't benefit from
  * spreading), and because we use trees to handle large sets of
  * collisions in bins, we just XOR some shifted bits in the
  * cheapest possible way to reduce systematic lossage, as well as
  * to incorporate impact of the highest bits that would otherwise
  * never be used in index calculations because of table bounds.
  *
  * 1.7 是用key的hash值与低位进行 & 运算，这样会使得到的下标不够散列
  * 1.8 是用key的hash值与高位进行 & 运算，这样会让得到的下标更加散列
  *
  * 由于1.7 hash 和（length-1）运算，length 绝大多数情况小于2的16次方。
  * 所以始终是hashcode 的低16位（甚至更低）参与运算。
  * 要是高16位也参与运算，会让得到的下标更加散列。
  * 因此1.8先通过 h >>> 16 获取key的高位 然后再与key的hash值进行 ^ 运算
  * 用 ^ 运算是因为 &和|都会使得结果偏向0或者1 ,并不是均匀的概念,所以用 ^
  */
static final int hash(Object key) {
    int h;
    //如果传入的key为null则会默认返回0，也就是桶位的第一位
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}