Rust HashMap 源码分析

Ilqjx

已于 2022-06-12 11:52:52 修改

阅读量612

点赞数

分类专栏： Rust 文章标签： rust

于 2022-06-03 18:13:07 首次发布

本文链接：https://blog.csdn.net/qq_44069788/article/details/125023795

版权

Rust 专栏收录该内容

15 篇文章 4 订阅

订阅专栏

Rust HashMap 源码分析

1 HashMap 数据结构
2 扩容

1 HashMap 数据结构

A hash map implemented with quadratic probing and SIMD lookup.

pub struct HashMap<K, V, S = RandomState> {
    base: base::HashMap<K, V, S>,
}

pub struct RandomState {
    k0: u64,
    k1: u64,
}

pub struct HashMap<K, V, S = DefaultHashBuilder, A: Allocator + Clone = Global> {
    pub(crate) hash_builder: S,
    pub(crate) table: RawTable<(K, V), A>,
}

pub struct RawTable<T, A: Allocator + Clone = Global> {
    table: RawTableInner<A>,
    // Tell dropck that we own instances of T.
    marker: PhantomData<T>,
}

struct RawTableInner<A> {
    // Mask to get an index from a hash value. The value is one less than the
    // number of buckets in the table.
    bucket_mask: usize,

    // [Padding], T1, T2, ..., Tlast, C1, C2, ...
    //                                ^ points here
    ctrl: NonNull<u8>,

    // Number of elements that can be inserted before we need to grow the table
    growth_left: usize,

    // Number of elements in the table, only really used by len()
    items: usize,

    alloc: A,
}

重点关注 struct RawTableInner<A> 的前四个字段：
1.bucket_mask: 哈希表中哈希桶的数量减一。
2.ctrl: 指针，指向堆内存哈希表末端的 ctrl 区。
3.growth_left: capacity - len (容量 - 长度)
4.items: len (长度)

use std::collections::HashMap;

fn main() {
    let mut map = HashMap::new();
    map.insert("hello", "world");

    let data: [usize; 6] = unsafe { std::mem::transmute(map) };
    // [4096496360859545168, 8063513060474661978, 3, 2940168701456, 2, 1]
    // 前两个值是两个 u64 的 RandomState
    // cap: 3, ctrl: 2940168701456, cap - len: 2, len: 1
    println!("{:?}", data);
}

2 扩容

以 2 的 n 次幂的方式进行扩容，0，3 (2² - 1)，7 (2³ - 1)，14 (2⁴ - 2⁴ * 12.5%)，28 (2⁵ - 2⁵ * 12.5%)…

pub fn insert(&mut self, k: K, v: V) -> Option<V> {
	self.base.insert(k, v)
}

pub fn insert(&mut self, k: K, v: V) -> Option<V> {
	// 根据 key 生成 hash
	let hash = make_insert_hash::<K, S>(&self.hash_builder, &k);
    if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) { // key 已存在
        Some(mem::replace(item, v))
    } else { // key 不存在
        self.table
            .insert(hash, (k, v), make_hasher::<K, _, V, S>(&self.hash_builder));
        None
    }
}

pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket<T> {
 unsafe {
        let mut index = self.table.find_insert_slot(hash);

        // We can avoid growing the table once we have reached our load
        // factor if we are replacing a tombstone. This works since the
        // number of EMPTY slots does not change in this case.
        let old_ctrl = *self.table.ctrl(index);
        if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) {
        	// 在这里面进行扩容
            self.reserve(1, hasher);
            index = self.table.find_insert_slot(hash);
        }

        self.table.record_item_insert_at(index, old_ctrl, hash);

        let bucket = self.bucket(index);
        bucket.write(value);
        bucket
    }
}

#[cfg_attr(feature = "inline-more", inline)]
pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) {
    if additional > self.table.growth_left {
        // Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
        // 在这里面进行扩容
        if self
            .reserve_rehash(additional, hasher, Fallibility::Infallible)
            .is_err()
        {
            unsafe { hint::unreachable_unchecked() }
        }
    }
}

fn reserve_rehash(
	&mut self,
    additional: usize,
    hasher: impl Fn(&T) -> u64,
    fallibility: Fallibility,
) -> Result<(), TryReserveError> {
    // Avoid `Option::ok_or_else` because it bloats LLVM IR.
    let new_items = match self.table.items.checked_add(additional) {
        Some(new_items) => new_items,
        None => return Err(fallibility.capacity_overflow()),
    };
    let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask);
    if new_items <= full_capacity / 2 {
        // Rehash in-place without re-allocating if we have plenty of spare
        // capacity that is locked up due to DELETED entries.
        self.rehash_in_place(hasher);
        Ok(())
    } else {
    	// 在这里面进行扩容
        // Otherwise, conservatively resize to at least the next size up
        // to avoid churning deletes into frequent rehashes.
        self.resize(
            usize::max(new_items, full_capacity + 1),
            hasher,
            fallibility,
        )
    }
}

fn resize(
 &mut self,
    capacity: usize,
    hasher: impl Fn(&T) -> u64,
    fallibility: Fallibility,
) -> Result<(), TryReserveError> {
    unsafe {
    	// 在这里面进行扩容
        let mut new_table =
            self.table
                .prepare_resize(TableLayout::new::<T>(), capacity, fallibility)?;

        // Copy all elements to the new table.
        for item in self.iter() {
            // This may panic.
            let hash = hasher(item.as_ref());

            // We can use a simpler version of insert() here since:
            // - there are no DELETED entries.
            // - we know there is enough space in the table.
            // - all elements are unique.
            let (index, _) = new_table.prepare_insert_slot(hash);
            new_table.bucket(index).copy_from_nonoverlapping(&item);
        }

        // We successfully copied all elements without panicking. Now replace
        // self with the new table. The old table will have its memory freed but
        // the items will not be dropped (since they have been moved into the
        // new table).
        mem::swap(&mut self.table, &mut new_table);

        Ok(())
    }
}

unsafe fn prepare_resize(
 	&self,
    table_layout: TableLayout,
    capacity: usize,
    fallibility: Fallibility,
) -> Result<crate::scopeguard::ScopeGuard<Self, impl FnMut(&mut Self)>, TryReserveError> {
    debug_assert!(self.items <= capacity);
	
	// 扩容，重分配并且初始化个新表
    // Allocate and initialize the new table.
    let mut new_table = RawTableInner::fallible_with_capacity(
        self.alloc.clone(),
        table_layout,
        capacity,
        fallibility,
    )?;
    new_table.growth_left -= self.items;
    new_table.items = self.items;

    // The hash function may panic, in which case we simply free the new
    // table without dropping any elements that may have been copied into
    // it.
    //
    // This guard is also used to free the old table on success, see
    // the comment at the bottom of this function.
    Ok(guard(new_table, move |self_| {
        if !self_.is_empty_singleton() {
            self_.free_buckets(table_layout);
        }
    }))
}

为什么容量不是 2 的整数倍？
当 1/2/4/8 的时候，会留一个空桶。当大于 8 的时候，会留下 12.5% 的空桶。
e.g. 当添加至长度为 8 的时候，会发生一次扩容，扩容后的容量为 14 (16 - 16 * 12.5%)。

fn bucket_mask_to_capacity(bucket_mask: usize) -> usize {
    if bucket_mask < 8 {
        // For tables with 1/2/4/8 buckets, we always reserve one empty slot.
        // Keep in mind that the bucket mask is one less than the bucket count.
        bucket_mask
    } else {
        // For larger tables we reserve 12.5% of the slots as empty.
        ((bucket_mask + 1) / 8) * 7
    }
}

Ilqjx

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Rust HashMap 源码分析

Rust HashMap 源码分析1 HashMap 数据结构1 HashMap 数据结构use hashbrown::hash_map as base;#[derive(Clone)]pub struct RandomState { k0: u64, k1: u64,}pub struct HashMap<K, V, S = RandomState> { base: base::HashMap<K, V, S>,}HashMap 有三个
复制链接

扫一扫