作业一:
用你熟悉的编程语言实现一致性 hash 算法。
编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。
物理节点数 10 个,元素 100 万,key 长度 10 byte,虚拟机节点数与标准差的关系
- key 的长度对标准差没有影响
- hash 对标准差的影响不大。通过对比 hash 函数为 MD5 和 CRC32。CRC32 稍好于 MD5
- 虚拟机节点个数对标准影响很大,见下表。
虚拟节点 | 标准差 |
---|---|
100 | 6005 |
200 | 6342 |
400 | 5748 |
800 | 3059 |
1600 | 1856 |
4800 | 1265 |
9600 | 1131 |
20000 | 755 |
9600 开始出现虚拟节点的 key 冲突
作业二:
根据当周学习情况,完成一篇学习总结
这周最大的收获就是作业一。最近刚研究一致性 Hash,参考一个开源的实,从代码层面理解了一致性 Hash 的原理。但是,完成作业的过程中,对原有代码进行了很大的重构,对某些细节理解得更加深刻了。
本周其他内容属于科普性质。可以参考老师的《大规模技术架构》这本书,显然老师对本周的内容自己并没有过多的实践,所有讲得内容几乎是市面的科普文。
附录
import lombok.extern.slf4j.Slf4j;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.zip.CRC32;
/**
* @author liuwenxue
* @date 2020-06-18
*/
@Slf4j
public class ConsistentHash<K, V> {
// 节点 key:虚拟节点
private final SortedMap<Long, VirtualNode> ring = new TreeMap<>();
private final Map<String, Node<K, V>> physicalNodeMap = new HashMap<>();
private final HashFunction hashFunction;
public ConsistentHash(List<Node<K, V>> pNodes, int vNodeCount) {
this(pNodes,vNodeCount, new MD5Hash());
}
/**
* 虚拟节点
*
* @param pNodes 物理节点
* @param vNodeCount 虚拟节点数量
* @param hashFunction 函数函数
*/
public ConsistentHash(List<Node<K, V>> pNodes, int vNodeCount, HashFunction hashFunction) {
if (hashFunction == null) {
throw new NullPointerException("Hash Function is null");
}
this.hashFunction = hashFunction;
if (pNodes != null) {
for (Node pNode : pNodes) {
addNode(pNode, vNodeCount);
}
}
}
/**
* 将 pNode 加入 ring
*
* @param pNode 物理节点
* @param vNodeCount 虚拟节点数量
*/
public void addNode(Node<K, V> pNode, int vNodeCount) {
addNode(pNode, 1, vNodeCount);
}
/**
* 将 pNode 加入 ring
*
* @param pNode 物理节点
* @param vNodeCount 虚拟节点数量
*/
public void addNode(Node<K, V> pNode, int weight, int vNodeCount) {
if (vNodeCount < 0) {
throw new IllegalArgumentException("illegal virtual node counts :" + vNodeCount);
}
if (physicalNodeMap.containsKey(pNode.getKey())) {
throw new IllegalArgumentException("conflict physical node " + pNode.getKey());
}
physicalNodeMap.put(pNode.getKey(), pNode);
int existingReplicas = getExistingReplicas(pNode);
for (int i = 0; i < vNodeCount * weight; i++) {
VirtualNode vNode = new VirtualNode(pNode.getKey(), weight, i + existingReplicas);
Long key = hashFunction.hash(vNode.getKey());
if (!ring.containsKey(key)) {
ring.put(key, vNode);
} else {
log.warn("physical node key conflicted");
}
}
}
/**
* 从 ring 中删除物理机节点
*
* @param pNode 物理节点
*/
public void removeNode(Node<K, V> pNode) {
physicalNodeMap.remove(pNode.getKey());
Iterator<Long> it = ring.keySet().iterator();
while (it.hasNext()) {
Long key = it.next();
VirtualNode virtualNode = ring.get(key);
if (virtualNode.isVirtualNodeOf(pNode.getKey())) {
it.remove();
}
}
}
/**
* 获取所有物理节点
*/
public Collection<Node<K, V>> getAllPhysicalNodes() {
return physicalNodeMap.values();
}
/**
* 获取所有虚拟机节点
*/
public Collection<VirtualNode> getAllVirtualNodes() {
return ring.values();
}
/**
* 打印所有虚拟机节点
*/
public void dumpAllVirtualNodes() {
System.out.println("all virtual nodes:");
for (VirtualNode virtualNode : ring.values()) {
System.out.println(virtualNode.getKey());
}
}
/**
* 获取某个物理节点的虚拟节点
*
* @param pNode 物理节点
*/
public List<VirtualNode> getVirtualNodes(Node<K, V> pNode) {
List<VirtualNode> virtualNodes = new ArrayList<>();
Iterator<Long> it = ring.keySet().iterator();
while (it.hasNext()) {
Long key = it.next();
VirtualNode virtualNode = ring.get(key);
if (virtualNode.isVirtualNodeOf(pNode.getKey())) {
virtualNodes.add(virtualNode);
}
}
return virtualNodes;
}
/**
* 输出某个物理节点的虚拟节点
*
* @param pNode 物理节点
*/
public void dumpVirtualNodes(Node<K, V> pNode) {
System.out.println("virtual nodes of node" + pNode.getKey());
for (VirtualNode virtualNode : getVirtualNodes(pNode)) {
System.out.println(virtualNode.getKey());
}
}
/**
* 找到对象 Key 对应的物理机节点
*
* @param objectKey 对象的 key
* @return 对应的物理节点
*/
public Node<K, V> routeNode(String objectKey) {
if (ring.isEmpty()) {
return null;
}
Long hashVal = hashFunction.hash(objectKey);
if (!ring.containsKey(hashVal)) {
SortedMap<Long, VirtualNode> tailMap = ring.tailMap(hashVal);
hashVal = tailMap.isEmpty() ? ring.firstKey() : tailMap.firstKey() ;
}
return physicalNodeMap.get(ring.get(hashVal).getPhysicalNodeId());
}
/**
* 找到某个物理节点的虚拟节点数量
*
* @param pNode 物理机节点
* @return 虚拟节点数量
*/
public int getExistingReplicas(Node<K, V> pNode) {
int replicas = 0;
for (VirtualNode vNode : ring.values()) {
if (vNode.isVirtualNodeOf(pNode.getKey())) {
replicas++;
}
}
return replicas;
}
public V get(K key) {
Node<K, V> node = routeNode(String.valueOf(key.hashCode()));
return node.get(key);
}
public void put(K key, V value) {
routeNode(String.valueOf(key.hashCode())).put(key, value);
}
public void reset() {
for (Node<K, V> node : physicalNodeMap.values()) {
node.reset();
}
}
public static class MD5Hash implements HashFunction {
MessageDigest instance;
public MD5Hash() {
try {
instance = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
}
}
@Override
public long hash(String key) {
instance.reset();
instance.update(key.getBytes());
byte[] digest = instance.digest();
long h = 0;
for (int i = 0; i < 4; i++) {
h <<= 8;
h |= ((int) digest[i]) & 0xFF;
}
return h;
}
}
public static class CRC32Hash implements HashFunction {
CRC32 crc32;
public CRC32Hash() {
crc32 = new CRC32();
}
@Override
public long hash(String key) {
crc32.update(key.getBytes());
return crc32.getValue();
}
}
}
/**
* @author liuwenxue
* @date 2020-06-18
*/
public interface HashFunction {
long hash(String key);
}
/**
* @author : liuwenxue
* @date :2020/06/18 23:49
* @description :
* @path : com.wexueliu.bizhidao.consistenthash.Node
* @modifiedBy :
*/
public interface Node<K extends Object, V> {
String getKey();
V get(K key);
void put(K key, V value);
long size();
void reset();
}
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.RemovalListener;
import java.util.concurrent.TimeUnit;
/**
* @author liuwenxue
* @date 2020-07-07
*/
public class PhysicalNode<K, V> implements Node<K, V> {
private Cache<K, V> cache = Caffeine.newBuilder().maximumSize(1100000).expireAfterWrite(10, TimeUnit.HOURS)
//.weakKeys().weakValues()
.removalListener(
(RemovalListener<? super Object, ? super Object>) (k, v, cause) -> {
System.out.println("k=" + k + ";value" + v);
}).build();
private String ip;
private int port;
PhysicalNode(String ip, int port) {
this.ip = ip;
this.port = port;
}
@Override
public V get(K key) {
return cache.getIfPresent(key);
}
@Override
public void put(K key, V value) {
cache.put(key, value);
}
@Override
public long size() {
return cache.estimatedSize();
}
@Override
public String getKey() {
return ip + ":" + port;
}
@Override
public void reset() {
cache.invalidateAll();
}
}
/**
* @author liuwenxue
* @date 2020-06-18
*/
public class VirtualNode {
final String physicalNodeId;
final int weight;
final int replicaIndex;
public VirtualNode(String physicalNode, int weight, int replicaIndex) {
this.replicaIndex = replicaIndex;
this.weight = weight;
this.physicalNodeId = physicalNode;
}
public String getKey() {
return replicaIndex + "-" + weight + "-" + physicalNodeId;
}
public boolean isVirtualNodeOf(String pNodeId) {
return physicalNodeId.equals(pNodeId);
}
public String getPhysicalNodeId() {
return physicalNodeId;
}
public int getWeight() {
return weight;
}
}
测试代码
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @author liuwenxue
* @date 2020-07-07
*/
public class Test {
public static void main(String[] args) {
test(100, 1000000, new ConsistentHash.MD5Hash(), 10);
test(200, 1000000, new ConsistentHash.MD5Hash(), 10);
test(400, 1000000, new ConsistentHash.MD5Hash(), 10);
test(800, 1000000, new ConsistentHash.MD5Hash(), 10);
test(1600, 1000000, new ConsistentHash.MD5Hash(), 10);
test(4800, 1000000, new ConsistentHash.MD5Hash(), 10);
test(9600, 1000000, new ConsistentHash.MD5Hash(), 10);
test(20000, 1000000, new ConsistentHash.MD5Hash(), 10);
test(9600, 1000000, new ConsistentHash.CRC32Hash(), 10);
test(20000, 1000000, new ConsistentHash.CRC32Hash(), 10);
}
public static void test(int virtualCount, int totalElement, HashFunction hashFunction, int keyLen) {
Node<String, String> p1 = new PhysicalNode<>("10.1.1.1", 80);
Node<String, String> p2 = new PhysicalNode<>("10.1.1.2", 80);
Node<String, String> p3 = new PhysicalNode<>("10.1.1.3", 80);
Node<String, String> p4 = new PhysicalNode<>("10.1.1.4", 80);
Node<String, String> p5 = new PhysicalNode<>("10.1.1.5", 80);
Node<String, String> p6 = new PhysicalNode<>("10.1.1.6", 80);
Node<String, String> p7 = new PhysicalNode<>("10.1.1.7", 80);
Node<String, String> p8 = new PhysicalNode<>("10.1.1.8", 80);
Node<String, String> p9 = new PhysicalNode<>("10.1.1.9", 80);
Node<String, String> p10 = new PhysicalNode<>("10.1.1.10", 80);
List<Node<String, String>> physicalNodes = new ArrayList<>();
physicalNodes.addAll(Arrays.asList(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10));
ConsistentHash<String, String> consistentHash = new ConsistentHash<>(physicalNodes, virtualCount);
//consistentHash.dumpAllVirtualNodes();
for (int i = 0; i < totalElement; i++) {
consistentHash.put(randomKey(keyLen), "");
}
List<Long> nodeSizes = new ArrayList<>();
for (Node<String, String> node : consistentHash.getAllPhysicalNodes()) {
//System.out.println(node.size());
nodeSizes.add(node.size());
}
System.out.println(sqrtList(nodeSizes));
}
private static double sqrtList(List<Long> nodeSizes) {
int elementSize = nodeSizes.size();
long total = 0;
for (long size : nodeSizes) {
total += size;
}
System.out.println("total size " + total);
long avg = total / elementSize;
double sqrt = 0;
for (long size : nodeSizes) {
sqrt += Math.pow(size - avg, 2);
}
sqrt /= elementSize;
return Math.sqrt(sqrt);
}
private static String randomKey(int len) {
SecureRandom secureRandom = new SecureRandom();
byte[] key = new byte[len];
secureRandom.nextBytes(key);
return String.valueOf(key);
}
}