背景:
一致性Hash用于分布式缓存系统,将Key值映射到具体机器Ip上,并且增加和删除1台机器的数据移动量较小,对现网影响较小
实现:
1 Hash环:将节点的Hash值映射到一个Hash环中,每个Key顺时针第一个找到的节点,就是这个Key被路由到的机器
2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀,扩缩容影响较小
一致性Hash用于分布式缓存系统,将Key值映射到具体机器Ip上,并且增加和删除1台机器的数据移动量较小,对现网影响较小
实现:
1 Hash环:将节点的Hash值映射到一个Hash环中,每个Key顺时针第一个找到的节点,就是这个Key被路由到的机器
2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀,扩缩容影响较小
代码实例:
/*
* @ 一致性Hash模拟测试
* @ 结论:模拟4台机器扩容1台,遍历Key[0,999983]
- 一致性Hash需移动181161个Key,约占18%(1/5左右,符合预期效果)
- 取模Hash需移动799984个Key,约占80%
* @ 2014.05.30
*/
#include <stdint.h>
#include <iostream>
#include <string.h>
#include <sstream>
#include <map>
#include <vector>
using namespace std;
#define HASH_MOD (999983)
template <class T>
string ToStr(const T &t)
{
stringstream stream;
stream << t;
return stream.str();
}
uint32_t APHash(string &sKey)
{
char *key = (char*)sKey.c_str();
unsigned int hash = 0;
for (int i=0; *key; i++)
{
if ((i & 1) == 0) {
hash ^= ((hash<<7)^(*key++)^(hash>>3));
} else {
hash ^= (~((hash<<11)^(*key++)^(hash>>5)));
}
}
return hash%HASH_MOD;
}
class CMyConHash
{
public:
/* 添加一台机器(IP) */
void AddIp(const string &sIp)
{
// 每个IP分配128个虚拟节点,原因:结合APHash实验结果分布较均匀
for (int i = 0; i < 128; i ++)
{
string sCode = sIp + ToStr(i) + "#Hash";
uint32_t uVirKey = APHash(sCode);
mapVirKey2Ip[uVirKey] = sIp;
mapIp2VirKey[sIp].push_back(uVirKey);
}
}
/* 删除一台机器(IP) */
void DelIp(const string &sIp)
{
if (mapIp2VirKey.count(sIp) == 0) {
cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl;
return;
}
vector<uint32_t> vecVirKey = mapIp2VirKey[sIp];
for (int i = 0; i < vecVirKey.size(); i ++)
{
uint32_t uVirKey = vecVirKey[i];
if (mapVirKey2Ip[uVirKey] == sIp) {
// 得判断下,有可能2个IP虚拟节点相等后覆盖了
mapVirKey2Ip.erase(uVirKey);
}
}
mapIp2VirKey.erase(sIp);
}
/* 路由:给每个Key找到负责的机器(IP) */
int FindIp(uint32_t uKey, string &sIp)
{
if (mapVirKey2Ip.size() == 0) {
cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl;
return -1;
}
bool bFind = false;
uint32_t uVirKey;
map<uint32_t, string>::iterator iter;
// 遍历std::map是按Key大小顺序输出(区别std::tr1::unordered_map)
for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++)
{
uVirKey = iter->first;
if (uVirKey > uKey%HASH_MOD) {
sIp = iter->second;
bFind = true;
break;
}
}
if (!bFind) {
// 找不到比Key小的虚拟节点,故使用最小的虚拟节点(环)
iter = mapVirKey2Ip.begin();
uVirKey = iter->first;
sIp = iter->second;
}
//cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl;
return 0;
}
/* 打印各个IP负责的Key区域大小,影响因素:1 Hash函数 2 虚拟节点个数 */
/* 4台机器的情况,相对还是较均匀:
Ip=202.168.14.241,Cnt=251649
Ip=202.168.14.242,Cnt=257902
Ip=202.168.14.243,Cnt=245945
Ip=202.168.14.244,Cnt=235516 */
void EchoIpState()
{
map<string, uint32_t> mapIpCnt;
map<uint32_t, string>::iterator iter = mapVirKey2Ip.end();
iter --;
uint32_t uPreKey = iter->first;
string sPreIp = iter->second;
do {
iter --;
uint32_t uVirKey = iter->first;
string sIp = iter->second;
if (mapIpCnt.count(sPreIp) == 0) {
mapIpCnt[sPreIp] = uPreKey-uVirKey;
} else {
mapIpCnt[sPreIp] += uPreKey-uVirKey;
}
uPreKey = uVirKey;
sPreIp = sIp;
} while (iter != mapVirKey2Ip.begin());
cout << "Ip Size=" << mapIpCnt.size() << endl;
map<string, uint32_t>::iterator iter1;
for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++)
{
cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl;
}
}
private:
map< uint32_t, string > mapVirKey2Ip;
map< string, vector<uint32_t> > mapIp2VirKey;
};
class CMyModHash
{
public:
void AddIp(const string &sIp)
{
vecIpList.push_back(sIp);
}
void FindIp(uint32_t uKey, string &sIp)
{
sIp = vecIpList[uKey%vecIpList.size()];
}
void EchoIpState()
{
cout << "Ip Cnt=" << vecIpList.size() << endl;
}
private:
vector<string> vecIpList;
};
int main()
{
CMyConHash oMyHash;
// CMyModHash oMyHash;
// 模拟初始化4台机器
oMyHash.AddIp("202.168.14.241");
oMyHash.AddIp("202.168.14.242");
oMyHash.AddIp("202.168.14.243");
oMyHash.AddIp("202.168.14.244");
oMyHash.EchoIpState();
// 保存下各个Key路由的机器
string sIp, arrKeyIp[HASH_MOD];
for (uint32_t key = 0; key < HASH_MOD; key ++)
{
oMyHash.FindIp(key, sIp);
arrKeyIp[key] = sIp;
}
// 模拟添加1台机器
oMyHash.AddIp("202.168.14.245");
oMyHash.EchoIpState();
// 判断多少Key对应数据需要移动机器
uint32_t uCnt = 0;
for (uint32_t key = 0; key < HASH_MOD; key ++)
{
oMyHash.FindIp(key, sIp);
if (arrKeyIp[key] != sIp) {
uCnt ++;
}
}
cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl;
return 0;
}