本文只是为了给自己定一个开始,最终的目是实现一个内网环境下的真实的p2p网络。具体相关涉及到的理论和算法原理后面会慢慢补充,也希望通过这个小的项目能使自己对P2P和分布式系统有一个初步形象的认识,望共勉!
源码文件
murmurhash3.h
#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_
#include <stdint.h>
/*
* MurMurHash算法,是非加密HASH算法,性能很高,
* 比传统的CRC32,MD5,SHA-1(这两个算法都是加密HASH算法,复杂度本身就很高,带来的性能上的损害也不可避免)
* 等HASH算法要快很多,而且这个算法的碰撞率很低.
* http://murmurhash.googlepages.com/
* 参数seed:种子,最好用一个质数
*/
uint32_t murmur3_32(const char *key, uint32_t len, uint32_t seed = 17);
#endif
murmurhash3.cpp
#include "murmurhash3.h"
uint32_t murmur3_32(const char *key, uint32_t len, uint32_t seed)
{
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
static const uint32_t r1 = 15;
static const uint32_t r2 = 13;
static const uint32_t m = 5;
static const uint32_t n = 0xe6546b64;
uint32_t hash = seed;
const int nblocks = len / 4;
const uint32_t *blocks = (const uint32_t *) key;
int i;
for (i = 0; i < nblocks; i++)
{
uint32_t k = blocks[i];
k *= c1;
k = (k << r1) | (k >> (32 - r1));
k *= c2;
hash ^= k;
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
}
const uint8_t *tail = (const uint8_t *) (key + nblocks * 4);
uint32_t k1 = 0;
switch (len & 3)
{
case 3:
k1 ^= tail[2] << 16;
case 2:
k1 ^= tail[1] << 8;
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = (k1 << r1) | (k1 >> (32 - r1));
k1 *= c2;
hash ^= k1;
}
hash ^= len;
hash ^= (hash >> 16);
hash *= 0x85ebca6b;
hash ^= (hash >> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >> 16);
return hash;
}
consistent_hash.h
#ifndef __CONSISTENT_HASH_H__
#define __CONSISTENT_HASH_H__
#include <map>
using namespace std;
class ConsistentHash
{
public:
ConsistentHash(int node_num, int virtual_node_num);
~ConsistentHash();
void Initialize();
size_t GetServerIndex(const char* key);
void DeleteNode(const int index);
void AddNewNode(const int index);
private:
map<uint32_t,size_t> server_nodes_; //虚拟节点,key是哈希值,value是机器的index
int node_num_;//真实机器节点个数
int virtual_node_num_;//每个机器节点关联的虚拟节点个数
};
#endif
consistent_hash.cpp
#include <map>
#include <string.h>
#include <sstream>
#include "consistent_hash.h"
#include "murmurhash3.h"
using namespace std;
ConsistentHash::ConsistentHash(int node_num, int virtual_node_num)
{
node_num_ = node_num;
virtual_node_num_ = virtual_node_num;
}
ConsistentHash::~ConsistentHash()
{
if(!server_nodes_.empty())
{
server_nodes_.clear();
}
}
void ConsistentHash::Initialize()
{
for(int i=0; i<node_num_; ++i)
{
for(int j=0; j<virtual_node_num_; ++j)
{
stringstream node_key;
node_key<<"SHARD-"<<i<<"-NODE-"<<j;
uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
server_nodes_.insert(pair<uint32_t, size_t>(partition, i));
}
}
}
size_t ConsistentHash::GetServerIndex(const char* key)
{
uint32_t partition = murmur3_32(key, strlen(key));
map<uint32_t, size_t>::iterator it = server_nodes_.lower_bound(partition);//沿环的顺时针找到一个大于等于key的虚拟节点
if(it == server_nodes_.end())//未找到
{
return server_nodes_.begin()->second;
}
return it->second;
}
//删除真实节点下的所有虚拟节点
void ConsistentHash::DeleteNode(const int index)
{
for(int j=0; j<virtual_node_num_; ++j)
{
stringstream node_key;
node_key<<"SHARD-"<<index<<"-NODE-"<<j;
uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
map<uint32_t,size_t>::iterator it = server_nodes_.find(partition);
if(it != server_nodes_.end())
{
server_nodes_.erase(it);
}
}
}
//添加节点和其对应的虚拟节点
void ConsistentHash::AddNewNode(const int index)
{
for(int j=0; j<virtual_node_num_; ++j)
{
stringstream node_key;
node_key<<"SHARD-"<<index<<"-NODE-"<<j;
uint32_t partition = murmur3_32(node_key.str().c_str(), strlen(node_key.str().c_str()));
server_nodes_.insert(pair<uint32_t, size_t>(partition, index));
}
}
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sstream>
#include <vector>
#include <time.h>
#include "consistent_hash.h"
#include "murmurhash3.h"
#include "color.h"
using namespace std;
int main(int argc, char const *argv[])
{
if(argc != 3)
{
printf(L_RED"usage:"YELLOW" ./main sample_count data_count\n"NONE);
return 0;
}
int sample_count = atoi(argv[1]);
int data_count = atoi(argv[2]);
int node_num = 5; //实际节点数量
int virtual_num = 100; //虚拟节点数量
ConsistentHash* consistent_hash = new ConsistentHash(node_num, virtual_num);
consistent_hash->Initialize();
printf("consistent hash initialize success, node_num=%d, virtual_num=%d\n", node_num, virtual_num);
vector<int> result(node_num,0);//节点存放数据数目统计
vector<int> data_index(data_count,-1);//数据存放节点位置,下标是数据值i,它存放在data_index[i]上
srand(time(NULL));
for(int i=0; i<sample_count; ++i)
{
int value = (rand()+getpid()) % data_count;
stringstream ss;
ss<<value;
const char* key = ss.str().c_str();
size_t index = consistent_hash->GetServerIndex(key);
result[index]++;
if(data_index[value] < 0 || (int)index != data_index[value])
{
printf("key = %s, index = %lu\n", key, index);
}
data_index[value] = index;
}
int error_index = 3;
consistent_hash->DeleteNode(error_index);
printf("node error,index = %d\n", error_index);
for(int i=0; i<sample_count; ++i)
{
int value = (rand()+getpid()) % data_count;
stringstream ss;
ss<<value;
const char* key = ss.str().c_str();
size_t index = consistent_hash->GetServerIndex(key);
result[index]++;
if(data_index[value] < 0 || (int)index != data_index[value])
{
printf("key = %s, index = %lu\n", key, index);
}
data_index[value] = index;
}
consistent_hash->AddNewNode(error_index);
printf("node recover,index = %d\n", error_index);
for(int i=0; i<sample_count; ++i)
{
int value = (rand()+getpid()) % data_count;
stringstream ss;
ss<<value;
const char* key = ss.str().c_str();
size_t index = consistent_hash->GetServerIndex(key);
result[index]++;
if(data_index[value] < 0 || (int)index != data_index[value])
{
printf("key = %s, index = %lu\n", key, index);
}
data_index[value] = index;
}
for(int i=0;i<node_num; ++i)
{
printf("index = %d, data_count = %d\n", i, result[i]);
}
return 0;
}