前言
Nacos核心功能点
服务注册:Nacos Client会通过发送REST请求的方式向Nacos Server注册自己的服务,提供自身的元数据,比如ip地址、端口等信息。 Nacos Server接收到注册请求后,就会把这些元数据信息存储在一个双层的内存Map中。
服务心跳:在服务注册后,Nacos Client会维护一个定时心跳来持续通知Nacos Server,说明服务一直处于可用状态,防止被剔除。默认 5s发送一次心跳。
服务同步:Nacos Server集群之间会互相同步服务实例,用来保证服务信息的一致性。
服务发现:服务消费者(Nacos Client)在调用服务提供者的服务时,会发送一个REST请求给Nacos Server,获取上面注册的服务清 单,并且缓存在Nacos Client本地,同时会在Nacos Client本地开启一个定时任务定时拉取服务端最新的注册表信息更新到本地缓存
服务健康检查:Nacos Server会开启一个定时任务用来检查注册服务实例的健康情况,对于超过15s没有收到客户端心跳的实例会将它的 healthy属性置为false(客户端服务发现时不会发现),如果某个实例超过30秒没有收到心跳,直接剔除该实例(被剔除的实例如果恢复发送 心跳则会重新注册)
本文主要研究的是客户端调用接口/instance进行服务注册后,服务端是怎么做的
nacos的源码的导入
下载nacos源码,本文选择Tag 1.1.4版本
git clone https://github.com/alibaba/nacos
直接运行console模块里的 com.alibaba.nacos.Nacos.java
增加启动vm参数,注意nacos.home目录要改成你自己源码对应的distribution模块目录
-Dnacos.standalone=true -Dnacos.home=/Users/yuchaolei/Desktop/learnning/nacos/distribution
在执行nacos源码或项目时,会提示istio依赖不存在问题。此问题有个原因:
checkstyle检查导致的问题,执行maven命令时,忽略掉checkstyle。问题现象如下:
解决方法如下:
mvn clean package -Dmaven.test.skip=true -Dcheckstyle.skip=true
客户端调用/instance接口后,会进入nacos-naming包下的InstanceController
服务注册
@RestController
@RequestMapping(UtilsAndCommons.NACOS_NAMING_CONTEXT + "/instance")
public class InstanceController {
//忽略代码....
@CanDistro
@PostMapping
public String register(HttpServletRequest request) throws Exception {
String serviceName = WebUtils.required(request, CommonParams.SERVICE_NAME);
String namespaceId = WebUtils.optional(request, CommonParams.NAMESPACE_ID, Constants.DEFAULT_NAMESPACE_ID);
serviceManager.registerInstance(namespaceId, serviceName, parseInstance(request));
return "ok";
}
//忽略代码....
}
@Component
@DependsOn("nacosApplicationContext")
public class ServiceManager implements RecordListener<Service> {
/**
* Map<namespace, Map<group::serviceName, Service>>
*/
//这是服务的存放位置
private Map<String, Map<String, Service>> serviceMap = new ConcurrentHashMap<>();
public void registerInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
//实例如果是第一次创建,那么会创建它,放到缓存serviceMap中,并为他开启一个定时任务,进行 服务健康检查
createEmptyService(namespaceId, serviceName, instance.isEphemeral());
//获取实例
Service service = getService(namespaceId, serviceName);
if (service == null) {throw new NacosException(“”);}
addInstance(namespaceId, serviceName, instance.isEphemeral(), instance);
}
public void addInstance(String namespaceId, String serviceName, boolean ephemeral, Instance... ips) throws NacosException {
//根据ephemeral判断是否是临时实例,true是临时实例,false是持久化实例
//然后生成key
String key = KeyBuilder.buildInstanceListKey(namespaceId, serviceName, ephemeral);
Service service = getService(namespaceId, serviceName);
synchronized (service) {
List<Instance> instanceList = addIpAddresses(service, ephemeral, ips);
Instances instances = new Instances();
instances.setInstanceList(instanceList);
consistencyService.put(key, instances);
}
}
}
如果是临时实例,consistencyService就是一个DelegateConsistencyServiceImpl(阿里自己实现的AP模式的 Distro协议)。
如果是持久化实例,consistencyService就是一个RaftConsistencyServiceImpl(阿里自己实现的CP模式的简单Raft协议)。
临时实例的服务注册
@org.springframework.stereotype.Service("distroConsistencyService")
public class DistroConsistencyServiceImpl implements EphemeralConsistencyService {
@Override
public void put(String key, Record value) throws NacosException {
//1.将注册实例信息更新到注册表内存结构里
onPut(key, value);
//2.同步实例信息到 nacos server集群其它节点。这后面再说
taskDispatcher.addTask(key);
}
public void onPut(String key, Record value) {
if (KeyBuilder.matchEphemeralInstanceListKey(key)) {
Datum<Instances> datum = new Datum<>();
datum.value = (Instances) value;
datum.key = key;
datum.timestamp.incrementAndGet();
dataStore.put(key, datum);
}
if (!listeners.containsKey(key)) {
return;
}
//往阻塞队列里放入注册实例数据
notifier.addTask(key, ApplyAction.CHANGE);
}
}
阻塞队列Notifier
往阻塞队列里放入注册实例数据,这些操作本身井不需要写入之后立即成功,用这种方式对提升操作性能有很大帮助
这个方式,牺牲了一点点实例注册的实时性,但是极大增加了框架的吞吐量
@org.springframework.stereotype.Service("distroConsistencyService")
public class DistroConsistencyServiceImpl implements EphemeralConsistencyService {
public class Notifier implements Runnable {
private ConcurrentHashMap<String, String> services = new ConcurrentHashMap<>(10 * 1024);
private BlockingQueue<Pair> tasks = new LinkedBlockingQueue<Pair>(1024 * 1024);
public void addTask(String datumKey, ApplyAction action) {
if (services.containsKey(datumKey) && action == ApplyAction.CHANGE) {
return;
}
if (action == ApplyAction.CHANGE) {
services.put(datumKey, StringUtils.EMPTY);
}
tasks.add(Pair.with(datumKey, action));
}
public int getTaskSize() {
return tasks.size();
}
@Override
public void run() {
while (true) {
try {
Pair pair = tasks.take();
if (pair == null) {
continue;
}
String datumKey = (String) pair.getValue0();
ApplyAction action = (ApplyAction) pair.getValue1();
services.remove(datumKey);
int count = 0;
if (!listeners.containsKey(datumKey)) {
continue;
}
for (RecordListener listener : listeners.get(datumKey)) {
count++;
try {
if (action == ApplyAction.CHANGE) { //如果需要更新
listener.onChange(datumKey, dataStore.get(datumKey).value);
continue;
}
if (action == ApplyAction.DELETE) {//如果需要删除
listener.onDelete(datumKey);
continue;
}
} catch (Throwable e) {
Loggers.DISTRO.error(");
}
}
if (Loggers.DISTRO.isDebugEnabled()) {
Loggers.DISTRO.debug();
}
} catch (Throwable e) {
Loggers.DISTRO.error(");
}
}
}
}
}
这里我们研究下listener.onChange(datumKey, dataStore.get(datumKey).value);
实例是这样进行更新的
public class Service extends com.alibaba.nacos.api.naming.pojo.Service implements Record, RecordListener<Instances> {
private Map<String, Cluster> clusterMap = new HashMap<>(); //里面放的是集群实例
@Override
public void onChange(String key, Instances value) throws Exception {
for (Instance instance : value.getInstanceList()) {
if (instance == null) {
// Reject this abnormal instance list:
throw new RuntimeException("got null instance " + key);
}
//权重的最大值是10000.0D
if (instance.getWeight() > 10000.0D) {
instance.setWeight(10000.0D);
}
//权重的最小值是0.01D
if (instance.getWeight() < 0.01D && instance.getWeight() > 0.0D) {
instance.setWeight(0.01D);
}
}
//更新实例
updateIPs(value.getInstanceList(), KeyBuilder.matchEphemeralInstanceListKey(key));
recalculateChecksum();
}
public void updateIPs(Collection<Instance> instances, boolean ephemeral) {
//为了防止读写井发冲突,大量的运用了 Coplon Write思想防止井发读写冲突,
//具体做法就是把原内存结构复制一份,操作完最后再合井回真正的注册表内存里去
Map<String, List<Instance>> ipMap = new HashMap<>(clusterMap.size());//创建一个副本
for (String clusterName : clusterMap.keySet()) {
ipMap.put(clusterName, new ArrayList<>()); //把现缓存中实例放进去
}
//开始遍历副本
for (Instance instance : instances) {
try {
if (instance == null) {
Loggers.SRV_LOG.error("[NACOS-DOM] received malformed ip: null");
continue;
}
if (StringUtils.isEmpty(instance.getClusterName())) {
instance.setClusterName(UtilsAndCommons.DEFAULT_CLUSTER_NAME);
}
//如果实例中不在缓存副本中,就把实例添加到副本中
if (!clusterMap.containsKey(instance.getClusterName())) {
Loggers.SRV_LOG.warn(");
Cluster cluster = new Cluster(instance.getClusterName(), this);
//添加健康检查任务
cluster.init();
getClusterMap().put(instance.getClusterName(), cluster);
}
List<Instance> clusterIPs = ipMap.get(instance.getClusterName());
if (clusterIPs == null) {
clusterIPs = new LinkedList<>();
ipMap.put(instance.getClusterName(), clusterIPs);
}
clusterIPs.add(instance);
} catch (Exception e) {
Loggers.SRV_LOG.error("[NACOS-DOM] failed to process ip: " + instance, e);
}
}
for (Map.Entry<String, List<Instance>> entry : ipMap.entrySet()) {
//make every ip mine
List<Instance> entryIPs = entry.getValue();
clusterMap.get(entry.getKey()).updateIPs(entryIPs, ephemeral);
}
setLastModifiedMillis(System.currentTimeMillis());
getPushService().serviceChanged(this);
StringBuilder stringBuilder = new StringBuilder();
for (Instance instance : allIPs()) {
stringBuilder.append(instance.toIPAddr()).append("_").append(instance.isHealthy()).append(",");
}
Loggers.EVT_LOG.info("[IP-UPDATED] namespace: {}, service: {}, ips: {}",
getNamespaceId(), getName(), stringBuilder.toString());
}
}
//待完成