传统大小为N的传统阵列的map需要N x len(obj)字节。在下图中,长度为16的阵列只存储6个元素,浪费了10个
sparseMap使用bit存储,比如下图,二进制字符串变为0100|1001|0000|1011。无论插槽是空的还是满的,每个元素仅需要1位记录
查找代码具体实现
const_iterator find_impl(const K &key, std::size_t hash) const
{
std::size_t ibucket = bucket_for_hash(hash); //hash % m_mod
std::size_t probe = 0;
while (true)
{
//对应的bucket
const std::size_t sparse_ibucket = sparse_array::sparse_ibucket(ibucket); // ibucket >> BUCKET_SHIFT
//在bucket的idx
const auto index_in_sparse_bucket = sparse_array::index_in_sparse_bucket(ibucket); // ibucket & sparse_array::BUCKET_MASK
//查看对应槽位的是否有数据
if ((m_first_or_empty_sparse_bucket + sparse_ibucket)->has_value(index_in_sparse_bucket))
{
auto value_it = (m_first_or_empty_sparse_bucket + sparse_ibucket)->value(index_in_sparse_bucket);
if (compare_keys(key, KeySelect()(*value_it)))
{
return const_iterator(m_sparse_buckets.cbegin() + sparse_ibucket, value_it);
}
}
//结束条件
else if (!(m_first_or_empty_sparse_bucket + sparse_ibucket)->has_deleted_value(index_in_sparse_bucket) || probe >= m_bucket_count)
{
return cend();
}
probe++;
//查找下一个
ibucket = next_bucket(ibucket, probe);
}
}
获取对应插槽实际存储数据位置
popcount是统计数值中有多少个1.
size_type index_to_offset(size_type index) const noexcept
{
tsl_sh_assert(index < BITMAP_NB_BITS);
//统计idx之前有多个1,即真实存在内存的偏移
return popcount(m_bitmap_vals & (1 << index) - 1));
}
参考文章:
Smerity.com: How Google Sparsehash achieves two bits of overhead per entry using sparsetable
代码:
https://github.com/Tessil/sparse-map/blob/master/include/tsl/sparse_hash.h