folly::AtomicHashMap实现分析

最新推荐文章于 2024-08-12 17:12:34 发布

junkangchen

最新推荐文章于 2024-08-12 17:12:34 发布

阅读量1.2k

点赞数 1

分类专栏： linux c++

本文链接：https://blog.csdn.net/cjk_cynosure/article/details/109151780

版权

linux c++ 专栏收录该内容

7 篇文章

订阅专栏

官方介绍

Efficiently thread safe for inserts (main point of this stuff),wait-free for lookups. 插入和查找都是wait-free的

You can erase from this container, but the cell containing the key will not be free or reclaimed.可以删除但是实际上内存不会删除

clear()函数为把全部元素清楚，只能一个线程调用

AtomicHashMap概览

AtomicHashMap由哪些组成呢？

std::atomic<SubMap*> subMaps_[kNumSubMaps_];// submap是AtomicHashArray  kNumSubMaps_=16

std::atomic<uint32_t> numMapsAllocated_;

可以看到一个AtomicHashMap由16个数组组成，结构体AtomicHashArray，AtomicHashArray的组成如下：

  const size_t capacity_;
  const size_t maxEntries_;
  const KeyT kEmptyKey_;//表示空key
  const KeyT kLockedKey_;//表示该槽位正在执行操作，lock住
  const KeyT kErasedKey_;//表示已经被删除的key
  
  ThreadCachedInt<uint64_t> numEntries_; // Successful key inserts
  ThreadCachedInt<uint64_t> numPendingEntries_; // Used by insertInternal
  std::atomic<int64_t> isFull_; // Used by insertInternal
  std::atomic<int64_t> numErases_; // Successful key erases

  value_type cells_[0]; // This must be the last field of this class

cells_是该类最后的成员，&cells 是该数组存元素的数组指针，具体可以看下面的初始化流程。

解决冲突的方法，是线性探测。为啥不用开链法，实现比较复杂，而且线性探测，cache局部性好

AtomicHashMap初始化

看看构造函数

template <
    typename KeyT,
    typename ValueT,
    typename HashFcn,
    typename EqualFcn,
    typename Allocator,
    typename ProbeFcn,
    typename KeyConvertFcn>
AtomicHashMap<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::AtomicHashMap(size_t finalSizeEst, const Config& config)
    : kGrowthFrac_(
          config.growthFactor < 0 ? 1.0f - config.maxLoadFactor
                                  : config.growthFactor) {
  CHECK(config.maxLoadFactor > 0.0f && config.maxLoadFactor < 1.0f);
  subMaps_[0].store(
      SubMap::create(finalSizeEst, config).release(),
      std::memory_order_relaxed);
  auto subMapCount = kNumSubMaps_;
  FOR_EACH_RANGE (i, 1, subMapCount) {
    subMaps_[i].store(nullptr, std::memory_order_relaxed);
  }
  numMapsAllocated_.store(1, std::memory_order_relaxed);
}

subMap就是AtomicHashArray，构造函数会调用create先创建一个数组，然后将subMaps_其他槽位先置为空指针。

看下create过程

template <
    class KeyT,
    class ValueT,
    class HashFcn,
    class EqualFcn,
    class Allocator,
    class ProbeFcn,
    class KeyConvertFcn>
typename AtomicHashArray<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::SmartPtr
AtomicHashArray<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::create(size_t maxSize, const Config& c) {
  CHECK_LE(c.maxLoadFactor, 1.0);
  CHECK_GT(c.maxLoadFactor, 0.0);
  CHECK_NE(c.emptyKey, c.lockedKey);
  size_t capacity = size_t(maxSize / c.maxLoadFactor);
  size_t sz = sizeof(AtomicHashArray) + sizeof(value_type) * capacity;

  auto const mem = Allocator().allocate(sz);
  try {
    new (mem) AtomicHashArray(
        capacity,
        c.emptyKey,
        c.lockedKey,
        c.erasedKey,
        c.maxLoadFactor,
        c.entryCountThreadCacheSize);
  } catch (...) {
    Allocator().deallocate(mem, sz);
    throw;
  }

  SmartPtr map(static_cast<AtomicHashArray*>((void*)mem));

  FOR_EACH_RANGE (i, 0, map->capacity_) {
    cellKeyPtr(map->cells_[i])
        ->store(map->kEmptyKey_, std::memory_order_relaxed);
  }
  return map;
}

分配内存mem，其大小是sizeof(AtomicHashArray) + sizeof(value_type) * capacity;将new出来的AtomicHashArray 绑定在mem上，然后将数组每个元素置为 kEmptyKey_ 空key.

查找流程

typename AtomicHashMap<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::iterator
AtomicHashMap<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::find(LookupKeyT k) {
  SimpleRetT ret = findInternal<LookupKeyT, LookupHashFcn, LookupEqualFcn>(k);
  if (!ret.success) {
    return end();
  }
  SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed);
  return iterator(this, ret.i, subMap->makeIter(ret.j));
}

// findInternal --
template <
    typename KeyT,
    typename ValueT,
    typename HashFcn,
    typename EqualFcn,
    typename Allocator,
    typename ProbeFcn,
    typename KeyConvertFcn>
template <class LookupKeyT, class LookupHashFcn, class LookupEqualFcn>
typename AtomicHashMap<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::SimpleRetT
AtomicHashMap<
    KeyT,
    ValueT,
    HashFcn,
    EqualFcn,
    Allocator,
    ProbeFcn,
    KeyConvertFcn>::findInternal(const LookupKeyT k) const {
  SubMap* const primaryMap = subMaps_[0].load(std::memory_order_relaxed);
  typename SubMap::SimpleRetT ret =
      primaryMap
          ->template findInternal<LookupKeyT, LookupHashFcn, LookupEqualFcn>(k);
  if (LIKELY(ret.idx != primaryMap->capacity_)) {
    return SimpleRetT(0, ret.idx, ret.success);
  }
  const unsigned int numMaps =
      numMapsAllocated_.load(std::memory_order_acquire);
  FOR_EACH_RANGE (i, 1, numMaps) {
    // Check each map successively.  If one succeeds, we're done!
    SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
    ret =
        thisMap
            ->template findInternal<LookupKeyT, LookupHashFcn, LookupEqualFcn>(
                k);
    if (LIKELY(ret.idx != thisMap->capacity_)) {
      return SimpleRetT(i, ret.idx, ret.success);
    }
  }
  // Didn't find our key...
  return SimpleRetT(numMaps, 0, false);
}

可见，挨个执行 submap（即是数组）的findInternal函数，找到key

checkLegalKeyIfKey<LookupKeyT>(key_in);

  for (size_t idx = keyToAnchorIdx<LookupKeyT, LookupHashFcn>(key_in),
              numProbes = 0;
       ;
       idx = ProbeFcn()(idx, numProbes, capacity_)) {
    const KeyT key = acquireLoadKey(cells_[idx]);
    if (LIKELY(LookupEqualFcn()(key, key_in))) {
      return SimpleRetT(idx, true);
    }
    if (UNLIKELY(key == kEmptyKey_)) {
      // if we hit an empty element, this key does not exist
      return SimpleRetT(capacity_, false);
    }
    // NOTE: the way we count numProbes must be same in find(), insert(),
    // and erase(). Otherwise it may break probing.
    ++numProbes;
    if (UNLIKELY(numProbes >= capacity_)) {
      // probed every cell...fail
      return SimpleRetT(capacity_, false);
    }
  }

findInternal函数主要是从hash(key) %capacity_位置开始遍历数组，如果找到想等的key,则返回，遍历到空的key 就返回空

注意到，这里有三个函数，LookupHashFcn，ProbeFcn，LookupEqualFcn，这个三个由用户创建hashmap的时候自定义

LookupHashFcn是哈希函数，ProbeFcn是idx跳转函数，目前有2个，一个线性跳转 idx++，一个二次跳转，每次idx+=numProbe跳转。LookupEqualFcn比较key是否想等。

atomicHashmap的value_type必须是std::pair<,>

 typedef std::pair<const KeyT, ValueT> value_type;

insert流程

insert调用了emplace函数，再调用insertInternal函数

emplace(LookupKeyT k, ArgTs&&... vCtorArgs) {

SimpleRetT ret = insertInternal<

LookupKeyT,

LookupHashFcn,

LookupEqualFcn,

LookupKeyToKeyFcn>(k, std::forward<ArgTs>(vCtorArgs)...);

SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed);

return std::make_pair(

iterator(this, ret.i, subMap->makeIter(ret.j)), ret.success);

}

尝试在已经分配的每个数组insert,如果Insert的时候最后个数组满了，就要判断能否分配新的数组。

如果可以，有tryLockMap锁住map,tryLockMap是对数组指针CAS操作，比较是不是空值然后赋值上kLockedPtr_，只有一个线程会CAS成功，其他线程进入一个自旋锁，直到新数组分配空间完毕（nextMapIdx==numMapsAllocated_.load）的时候

insertInternal(LookupKeyT key, ArgTs&&... vCtorArgs) {
beginInsertInternal:
  auto nextMapIdx = // this maintains our state
      numMapsAllocated_.load(std::memory_order_acquire);
  typename SubMap::SimpleRetT ret;
  FOR_EACH_RANGE (i, 0, nextMapIdx) {
    // insert in each map successively.  If one succeeds, we're done!
    SubMap* subMap = subMaps_[i].load(std::memory_order_relaxed);
    ret = subMap->template insertInternal<
        LookupKeyT,
        LookupHashFcn,
        LookupEqualFcn,
        LookupKeyToKeyFcn>(key, std::forward<ArgTs>(vCtorArgs)...);
    if (ret.idx == subMap->capacity_) {
      continue; // map is full, so try the next one
    }
    // Either collision or success - insert in either case
    return SimpleRetT(i, ret.idx, ret.success);
  }
SubMap* primarySubMap = subMaps_[0].load(std::memory_order_relaxed);
  if (nextMapIdx >= kNumSubMaps_ ||
      primarySubMap->capacity_ * kGrowthFrac_ < 1.0) {
    // Can't allocate any more sub maps.
    throw AtomicHashMapFullError();
  }

  if (tryLockMap(nextMapIdx)) {
    // Alloc a new map and shove it in.  We can change whatever
    // we want because other threads are waiting on us...
    size_t numCellsAllocated = (size_t)(
        primarySubMap->capacity_ *
        std::pow(1.0 + kGrowthFrac_, nextMapIdx - 1));
    size_t newSize = size_t(numCellsAllocated * kGrowthFrac_);
    DCHECK(
        subMaps_[nextMapIdx].load(std::memory_order_relaxed) ==
        (SubMap*)kLockedPtr_);
    // create a new map using the settings stored in the first map

    Config config;
    config.emptyKey = primarySubMap->kEmptyKey_;
    config.lockedKey = primarySubMap->kLockedKey_;
    config.erasedKey = primarySubMap->kErasedKey_;
    config.maxLoadFactor = primarySubMap->maxLoadFactor();
    config.entryCountThreadCacheSize =
        primarySubMap->getEntryCountThreadCacheSize();
    subMaps_[nextMapIdx].store(
        SubMap::create(newSize, config).release(), std::memory_order_relaxed);

    // Publish the map to other threads.
    numMapsAllocated_.fetch_add(1, std::memory_order_release);
    DCHECK_EQ(
        nextMapIdx + 1, numMapsAllocated_.load(std::memory_order_relaxed));
  } else {
    // If we lost the race, we'll have to wait for the next map to get
    // allocated before doing any insertion here.
    detail::atomic_hash_spin_wait([&] {
      return nextMapIdx >= numMapsAllocated_.load(std::memory_order_acquire);
    });
  }

  // Relaxed is ok here because either we just created this map, or we
  // just did a spin wait with an acquire load on numMapsAllocated_.
  SubMap* loadedMap = subMaps_[nextMapIdx].load(std::memory_order_relaxed);
  DCHECK(loadedMap && loadedMap != (SubMap*)kLockedPtr_);
  ret = loadedMap->insertInternal(key, std::forward<ArgTs>(vCtorArgs)...);
  if (ret.idx != loadedMap->capacity_) {
    return SimpleRetT(nextMapIdx, ret.idx, ret.success);
  }
  // We took way too long and the new map is already full...try again from
  // the top (this should pretty much never happen).
  goto beginInsertInternal;
}

下面是数组的internal的过程。遍历数组，遍历方法和find一致，idx = ProbeFcn()(idx, numProbes, capacity_);

这段代码毕竟复杂我用个伪代码来解释吧

if key是空key{
	++numPendingEntries_;//numPendingEntries_是在这个数组等待线程数
	if isFull_{
		--numPendingEntries_;
		自旋锁等待，直到numPendingEntries_==0
		isFull_.store(NO_PENDING_INSERTS, std::memory_order_release);
		if key是空key{
			return SimpleRetT(capacity_, false);//返回capacity_代表这个数组已经满了
		}
	}else{
		if tryLockCell 成功{
			插入value
			--numPendingEntries_;
            ++numEntries_; 
            //maxEntries_ = maxLoadFactor*capacity ,maxLoadFactor默认是0.8
			如果numEntries_ >= maxEntries_{
				isFull_.store(NO_NEW_INSERTS, std::memory_order_relaxed);
			}
			返回 SimpleRetT(idx, true);//idx是位置，true代表插入成功
		}
		--numPendingEntries_;
	}
}
	
if key是kLockedKey_,{
	自旋，直到key不是kLockedKey_
}
if key与要插入的key相等{
	直接返回该pair，因为hashmap不会覆盖旧值,让业务去该pair的值
	return SimpleRetT(idx, false);//代表插入失败
}
++numProbes;
//代表插满了
if (UNLIKELY(numProbes >= capacity_)) {
      return SimpleRetT(capacity_, false);
}
增加idx，遍历下个item

insertInternal(LookupKeyT key_in, ArgTs&&... vCtorArgs) {
  const short NO_NEW_INSERTS = 1;
  const short NO_PENDING_INSERTS = 2;
  checkLegalKeyIfKey<LookupKeyT>(key_in);

  size_t idx = keyToAnchorIdx<LookupKeyT, LookupHashFcn>(key_in);
  size_t numProbes = 0;
  for (;;) {
    DCHECK_LT(idx, capacity_);
    value_type* cell = &cells_[idx];
    if (relaxedLoadKey(*cell) == kEmptyKey_) {
      // NOTE: isFull_ is set based on numEntries_.readFast(), so it's
      // possible to insert more than maxEntries_ entries. However, it's not
      // possible to insert past capacity_.
      ++numPendingEntries_;
      if (isFull_.load(std::memory_order_acquire)) {
        --numPendingEntries_;

        // Before deciding whether this insert succeeded, this thread needs to
        // wait until no other thread can add a new entry.

        // Correctness assumes isFull_ is true at this point. If
        // another thread now does ++numPendingEntries_, we expect it
        // to pass the isFull_.load() test above. (It shouldn't insert
        // a new entry.)
        detail::atomic_hash_spin_wait([&] {
          return (isFull_.load(std::memory_order_acquire) !=
                  NO_PENDING_INSERTS) &&
              (numPendingEntries_.readFull() != 0);
        });
        isFull_.store(NO_PENDING_INSERTS, std::memory_order_release);

        if (relaxedLoadKey(*cell) == kEmptyKey_) {
          // Don't insert past max load factor
          return SimpleRetT(capacity_, false);
        }
      } else {
        // An unallocated cell. Try once to lock it. If we succeed, insert here.
        // If we fail, fall through to comparison below; maybe the insert that
        // just beat us was for this very key....
        if (tryLockCell(cell)) {
          KeyT key_new;
          // Write the value - done before unlocking
          try {
            key_new = LookupKeyToKeyFcn()(key_in);
            typedef
                typename std::remove_const<LookupKeyT>::type LookupKeyTNoConst;
            constexpr bool kAlreadyChecked =
                std::is_same<KeyT, LookupKeyTNoConst>::value;
            if (!kAlreadyChecked) {
              checkLegalKeyIfKey(key_new);
            }
            DCHECK(relaxedLoadKey(*cell) == kLockedKey_);
            // A const mapped_type is only constant once constructed, so cast
            // away any const for the placement new here.
            using mapped = typename std::remove_const<mapped_type>::type;
            new (const_cast<mapped*>(&cell->second))
                ValueT(std::forward<ArgTs>(vCtorArgs)...);
            unlockCell(cell, key_new); // Sets the new key
          } catch (...) {
            // Transition back to empty key---requires handling
            // locked->empty below.
            unlockCell(cell, kEmptyKey_);
            --numPendingEntries_;
            throw;
          }
          // An erase() can race here and delete right after our insertion
          // Direct comparison rather than EqualFcn ok here
          // (we just inserted it)
          DCHECK(
              relaxedLoadKey(*cell) == key_new ||
              relaxedLoadKey(*cell) == kErasedKey_);
          --numPendingEntries_;
          ++numEntries_; // This is a thread cached atomic increment :)
          if (numEntries_.readFast() >= maxEntries_) {
            isFull_.store(NO_NEW_INSERTS, std::memory_order_relaxed);
          }
          return SimpleRetT(idx, true);
        }
        --numPendingEntries_;
      }
    }
    DCHECK(relaxedLoadKey(*cell) != kEmptyKey_);
    if (kLockedKey_ == acquireLoadKey(*cell)) {
      detail::atomic_hash_spin_wait(
          [&] { return kLockedKey_ == acquireLoadKey(*cell); });
    }

    const KeyT thisKey = acquireLoadKey(*cell);
    if (LookupEqualFcn()(thisKey, key_in)) {
      // Found an existing entry for our key, but we don't overwrite the
      // previous value.
      return SimpleRetT(idx, false);
    } else if (thisKey == kEmptyKey_ || thisKey == kLockedKey_) {
      // We need to try again (i.e., don't increment numProbes or
      // advance idx): this case can happen if the constructor for
      // ValueT threw for this very cell (the rethrow block above).
      continue;
    }

    // NOTE: the way we count numProbes must be same in find(),
    // insert(), and erase(). Otherwise it may break probing.
    ++numProbes;
    if (UNLIKELY(numProbes >= capacity_)) {
      // probed every cell...fail
      return SimpleRetT(capacity_, false);
    }

    idx = ProbeFcn()(idx, numProbes, capacity_);
  }
}