一致性hash C++实现
知识标签: Consistent hashing, C++
这两篇关于Consistent hashing的文章不错:
理想化的 Redis 集群
一致性hash和solr千万级数据分布式搜索引擎中的应用
该代码是我在别人github上找到的源码,自己添加了一些注释
一致性哈希的功能被封装在模板类consistent_hash_map中
consistent_hash_map.h
consistent_hash_map.h如下:
#include <map>
#include <string>
#include <list>
#include <functional>
#include <algorithm>
#ifndef __CONSISTENT_HASH_H__
#define __CONSISTENT_HASH_H__
//consistent hash的节点类型。
//一元函数对象。接收T类型对象作为参数,返回一个整形作为其hash值,该hash值将被用于内部的排序。Hash需在其内部定义result_type 指明返回整形的类型。
template <typename T,
typename Hash,
typename Alloc = std::allocator<std::pair<const typename Hash::result_type,T > > >
class consistent_hash_map
{
public:
//hash函数返回值的类型
typedef typename Hash::result_type size_type;
//使用std::map来管理节点
typedef std::map<size_type,T,std::less<size_type>,Alloc> map_type;
//std::pair<const size_type, T>,first为节点的哈希值,second为节点。
typedef typename map_type::value_type value_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef typename map_type::iterator iterator;
typedef typename map_type::reverse_iterator reverse_iterator;
typedef Alloc allocator_type;
public:
consistent_hash_map() {}
~consistent_hash_map() {}
public:
//返回consistent_hash_map内的节点数量
std::size_t size() const
{
return nodes_.size();
}
//判断consistent_hash_map是否为空
bool empty() const
{
return nodes_.empty();
}
//插入一个节点,如果返回值中bool变量为真,iterator则为指向插入节点的迭代器。
//如果bool为假,表示插入失败,iterator指向已经存在的节点。
//插入失败因为节点已经存在或者是节点的hash值与其他节点发生冲突
std::pair<iterator,bool> insert(const T& node)
{
size_type hash = hasher_(node);
return nodes_.insert(value_type(hash,node));
}
//通过迭代器删除指定节点
void erase(iterator it)
{
nodes_.erase(it);
}
//通过节点值删除指定节点
std::size_t erase(const T& node)
{
size_type hash = hasher_(node);
return nodes_.erase(hash);
}
//hash为数据关键字的hash值, find方法能找到该数据映射的节点
iterator find(size_type hash)
{//按照一个圆环方向(顺时针或逆时针),寻找hash值>=给定hash的节点
if(nodes_.empty())
{
return nodes_.end();
}
//找到map中key值>=hash的第一个迭代器
iterator it = nodes_.lower_bound(hash);
if (it == nodes_.end())
{
it = nodes_.begin();
}
return it;
}
iterator begin() { return nodes_.begin(); }
iterator end() { return nodes_.end(); }
reverse_iterator rbegin() { return nodes_.rbegin(); }
reverse_iterator rend() { return nodes_.rend(); }
private:
Hash hasher_;
map_type nodes_;
};
#endif
test.cpp只是简单实现consistent hashing, 并没实现虚拟节点,关于虚拟节点的例子在下面testVnode.cpp中
test.cpp
test.cpp如下:
#include <iostream>
#include <string>
#include <boost/functional/hash.hpp>
#include <stdint.h> // for uint32_t
#include <boost/format.hpp>
#include <boost/crc.hpp> //for crc_optimal
#include "consistent_hash_map.hpp"
struct crc32_hasher
{
uint32_t operator()(const std::string& node)
{
//定义crc_optimal对象
boost::crc_32_type ret;
//处理字符串,生成CRC序列
ret.process_bytes(node.c_str(),node.size());
//checksum()返回CRC序列
return ret.checksum();
}
typedef uint32_t result_type;
};
int main(int argc, char const *argv[])
{
typedef consistent_hash_map<std::string,crc32_hasher> consistent_hash_t;
consistent_hash_t consistent_hash_;
//定义格式化字符串的类
boost::format node_fmt("192.168.1.%1%");
for(std::size_t i=0;i<3;++i)
{
std::string node = boost::str(node_fmt % i);
consistent_hash_.insert(node);
std::cout<<boost::format("add node: %1%") % node << std::endl;
}
{
std::cout<<"========================================================="<<std::endl;
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node: %1%,%2%") % it->second % it->first << std::endl;
}
}
// 输出相关数据关键字hash值映射的节点
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(290235110); //290235110代表数据关键字的hash值
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(2286285664);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
std::cout<<"========================================================="<<std::endl;
{// 删除192.168.1.1
std::string node = boost::str(node_fmt % 1);
consistent_hash_.erase(node);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
}
std::cout<<"========================================================="<<std::endl;
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_.erase(it);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
}
std::cout<<"========================================================="<<std::endl;
{
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
it = consistent_hash_.find(4282565576);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_.erase(it);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
std::cout<<"-------------------------------------------"<<std::endl;
}
std::cout<<"========================================================="<<std::endl;
{
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
if(it == consistent_hash_.end())
{
std::cout<<"not found, consistent_hash is empty"<<std::endl;
}
}
return 0;
}
编译:g++ test.cpp -o test
运行:./test
test结果
结果如下:
testVnode.cpp
testVnode.cpp如下:
#include <stdint.h>
#include <iostream>
#include <string>
#include <boost/functional/hash.hpp>
#include <boost/format.hpp>
#include <boost/crc.hpp>
#include "consistent_hash_map.hpp"
const char* nodes[] = {
"192.168.1.100",
"192.168.1.101",
"192.168.1.102",
"192.168.1.103",
"192.168.1.104" };
struct vnode_t
{
vnode_t() {}
vnode_t(std::size_t n,std::size_t v):node_id(n),vnode_id(v) {}
std::string to_str() const
{
return boost::str(boost::format("%1%-%2%") % nodes[node_id] % vnode_id);
}
std::size_t node_id;
std::size_t vnode_id;
};
struct crc32_hasher
{
uint32_t operator()(const vnode_t& node)
{
boost::crc_32_type ret;
std::string vnode = node.to_str();
std::cout<<"vnode:"<<vnode<<std::endl;
ret.process_bytes(vnode.c_str(),vnode.size());
return ret.checksum();
}
typedef uint32_t result_type;
};
int main(int argc, char const *argv[])
{
typedef consistent_hash_map<vnode_t,crc32_hasher> consistent_hash_t;
consistent_hash_t consistent_hash_;
for(std::size_t i=0;i<5;++i)
{//每个节点插入100个虚拟节点
for(std::size_t j=0;j<100;j++)
{
consistent_hash_.insert(vnode_t(i,j));
}
}
//遍历consistent_hash中的所有的vnode,统计每个虚拟节点的key的数量和每个主机包含key的数量
{
std::cout<<"========================================================="<<std::endl;
//sums统计每个主机可以包含的key数量
std::size_t sums[] = {0,0,0,0,0};
//处理圆环的链接点处第一个vnode
consistent_hash_t::iterator i = consistent_hash_.begin();
consistent_hash_t::reverse_iterator j = consistent_hash_.rbegin();
// 计算第一个节点包含的key数量
// static_cast<uint32_t>(-1)源码为UINT32_MAX, 但无法通过编译,替代之
std::size_t n = i->first + static_cast<uint32_t>(-1) - j->first;
std::cout<<boost::format("vnode:%1%,hash:%2%,contains:%3%")
% i->second.to_str() % i->first % n << std::endl;
sums[i->second.node_id] += n;
uint32_t priv = i->first;
uint32_t cur;
consistent_hash_t::iterator end = consistent_hash_.end();
// 处理圆环中间的vnode
while(++i != end)
{
cur = i->first;
n = cur - priv;
std::cout<<boost::format("vnode:%1%,hash:%2%,contains:%3%")
% i->second.to_str() % cur % n << std::endl;
sums[i->second.node_id] += n;
priv = cur;
}
for(std::size_t i=0;i<5;++i)
{
std::cout<<boost::format("node:%1% contains:%2%") % nodes[i] % sums[i] <<std::endl;
}
}
//查找某个hash值对应的vnode 和 主机
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(290235110);
std::cout<<boost::format("node:%1%,vnode:%2%,hash:%3%")
% nodes[it->second.node_id] % it->second.vnode_id % it->first << std::endl;
}
return 0;
}
testVnode结果自己测