Nacos 服务端服务注册源码解析
- 根据上文的客户端发送到服务段的请求,找到对应的方法
InstanceController#register
,ServiceManager
: 管理所有的服务,其中 serviceMap (Map(namespace, Map(group::serviceName, Service))),双map 结构存储服务列表,namespace:表示环境prod,uat,dev,group:表示那个产品sales、seckill,serviceName:服务名,一般是项目名。- Service: Nacos服务器端服务,存储了集群信息和实例列表信息
- 调用
ServiceManager#registerInstance
: 在AP模式下注册实例。
public void registerInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
// 创建空的服务,将服务放入 serviceMap 中,其中key是namespaceId, value是serviceName为key,value为service的map
createEmptyService(namespaceId, serviceName, instance.isEphemeral());
/**
* service -》list of clusters -》list of instance
*/
// 通过 namespaceId + 服务名,获取服务,因为前一步已经创建,
// 那么这里肯定不为空,否则抛出异常
Service service = getService(namespaceId, serviceName);
if (service == null) {
throw new NacosException(NacosException.INVALID_PARAM,
"service not found, namespace: " + namespaceId + ", service: " + serviceName);
}
// 添加服务实例
addInstance(namespaceId, serviceName, instance.isEphemeral(), instance);
}
- 调用
ServiceManager#createServiceIfAbsent
:创建空的服务,将服务放入 serviceMap 中,结构为Map(namespaceId, Map(group::serviceName, Service))。 首先将 service 放入 serviceMap 中,这个是注册表,表示了 nacos 上所有的服务注册信息。
public void putService(Service service) {
// 双重检测加锁
if (!serviceMap.containsKey(service.getNamespaceId())) {
synchronized (putServiceLock) {
if (!serviceMap.containsKey(service.getNamespaceId())) {
serviceMap.put(service.getNamespaceId(), new ConcurrentSkipListMap<>());
}
}
}
serviceMap.get(service.getNamespaceId()).put(service.getName(), service);
}
service#init()
: 初始化服务:进行心跳检测consistencyService#listen
, 实际作用的是一个代理类DelegateConsistencyServiceImpl
,这个类中注册了 service 变更的监听器,当 service 中的内容被改变时,会执行 service#onChange,修改 service 中的数据。
// DelegateConsistencyServiceImpl#listen 监听数据改变,保持数据一致性,委派设计模式
// 这两步就是向一个队列中添加一个监听器(RecordListener类型)当监听到某些事件时会执行里面的onChange方法
// 比如实例的注册、剔除等会被抽象成一个个的任务放到一个阻塞队列中,当监听到有任务时进来时,监听器会处理这些任务,执行onChange方法
consistencyService
.listen(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), true), service);
consistencyService
.listen(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), false), service);
- 接下来就是关键的代码:添加服务实例到 nacos 中,上一步提到了serviceMap ,它的结构是 service -> clusterMap(String, cluster) -> ephemeralInstances(临时实例列表)/persistentInstances(持久化实例列表), 每个服务中存在多个集群,每个集群中存在着多个实例列表。结构如下,摘自官网
public void addInstance(String namespaceId, String serviceName, boolean ephemeral, Instance... ips)
throws NacosException {
// ephemeral:当前实例是否是临时节点,否则持久化到数据库
//ephemeral key true:com.alibaba.nacos.naming.iplist.ephemeral.namespaceId##serviceName
//false: com.alibaba.nacos.naming.iplist.namespaceId##serviceName
String key = KeyBuilder.buildInstanceListKey(namespaceId, serviceName, ephemeral);
// 从 serviceMap 中获取 service
Service service = getService(namespaceId, serviceName);
synchronized (service) {
// 获取最新的实例列表,包含 ips
List<Instance> instanceList = addIpAddresses(service, ephemeral, ips);
Instances instances = new Instances();
instances.setInstanceList(instanceList);
//DelegateConsistencyServiceImpl#put
consistencyService.put(key, instances);
}
}
- 获取到对应 service 后,执行
addIpAddresses
方法,
// 获取最新的实例列表
private List<Instance> addIpAddresses(Service service, boolean ephemeral, Instance... ips) throws NacosException {
return updateIpAddresses(service, UtilsAndCommons.UPDATE_INSTANCE_ACTION_ADD, ephemeral, ips);
}
public List<Instance> updateIpAddresses(Service service, String action, boolean ephemeral, Instance... ips)
throws NacosException {
// 获取数据
Datum datum = consistencyService
.get(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), ephemeral));
// 根据 ephemeral 判断获取所有的临时节点还是持久节点
List<Instance> currentIPs = service.allIPs(ephemeral);
Map<String, Instance> currentInstances = new HashMap<>(currentIPs.size());
Set<String> currentInstanceIds = Sets.newHashSet();
for (Instance instance : currentIPs) {
currentInstances.put(instance.toIpAddr(), instance);
currentInstanceIds.add(instance.getInstanceId());
}
Map<String, Instance> instanceMap;
if (datum != null && null != datum.value) {
// 将旧的实例列表和当前的实例遍历,添加新增的实例到map,并返回
instanceMap = setValid(((Instances) datum.value).getInstanceList(), currentInstances);
} else {
instanceMap = new HashMap<>(ips.length);
}
// ips: 待添加的实例
for (Instance instance : ips) {
// putIfAbsent
if (!service.getClusterMap().containsKey(instance.getClusterName())) {
Cluster cluster = new Cluster(instance.getClusterName(), service);
cluster.init();
service.getClusterMap().put(instance.getClusterName(), cluster);
Loggers.SRV_LOG
.warn("cluster: {} not found, ip: {}, will create new cluster with default configuration.",
instance.getClusterName(), instance.toJson());
}
// 如果是移除节点,则 instanceMap 直接移除
if (UtilsAndCommons.UPDATE_INSTANCE_ACTION_REMOVE.equals(action)) {
instanceMap.remove(instance.getDatumKey());
} else {
// 进入这里,是修改 action,instance 以新增节点的属性为最新的进行修改
Instance oldInstance = instanceMap.get(instance.getDatumKey());
if (oldInstance != null) {
instance.setInstanceId(oldInstance.getInstanceId());
} else {
instance.setInstanceId(instance.generateInstanceId(currentInstanceIds));
}
instanceMap.put(instance.getDatumKey(), instance);
}
}
if (instanceMap.size() <= 0 && UtilsAndCommons.UPDATE_INSTANCE_ACTION_ADD.equals(action)) {
throw new IllegalArgumentException(
"ip list can not be empty, service: " + service.getName() + ", ip list: " + JacksonUtils
.toJson(instanceMap.values()));
}
return new ArrayList<>(instanceMap.values());
}
- 接下来就是最后一步,将最新的实例表,注册到nacos中, 核心就是调用了 DistroConsistencyServiceImpl#put
public void put(String key, Record value) throws NacosException {
//根据key 选择是否是临时存储内存还是持久存储,
// true:DistroConsistencyServiceImpl; false:persistentConsistencyServiceDelegate
mapConsistencyService(key).put(key, value);
}
public void put(String key, Record value) {
// 在阻塞队列中放入实例
onPut(key, value);
// 如果nacos部署也是个集群的前提,每秒同步数据到出当前 nacos 的其他服务,
distroProtocol.sync(new DistroKey(key, KeyBuilder.INSTANCE_LIST_KEY_PREFIX), DataOperation.CHANGE,
globalConfig.getTaskDispatchPeriod() / 2);
}
DistroConsistencyServiceImpl#onPut
: 添加新的实例记录
public void onPut(String key, Record value) {
// 临时节点匹配则存储到dataMap中,用于后续队列任务中的执行
if (KeyBuilder.matchEphemeralInstanceListKey(key)) {
Datum<Instances> datum = new Datum<>();
datum.value = (Instances) value;
datum.key = key;
datum.timestamp.incrementAndGet();
dataStore.put(key, datum);
}
// 当前 key,没有对应的监听器,则直接返回
if (!listeners.containsKey(key)) {
return;
}
// 向队列中添加新的通知任务。
notifier.addTask(key, DataOperation.CHANGE);
}
- 调用
notifier#addTask
: 将任务添加到阻塞队列中,根据 action 区分执行服务变更还是服务移除。
public void addTask(String datumKey, DataOperation action) {
// 避免重复的key,如果存在那么肯定在队列中,就不会在执行将任务添加到队列中
if (services.containsKey(datumKey) && action == DataOperation.CHANGE) {
return;
}
if (action == DataOperation.CHANGE) {
services.put(datumKey, StringUtils.EMPTY);
}
// 添加到阻塞队列中,阻塞队列的特性,当队列中存在元素时,则会执行元素的task#run
tasks.offer(Pair.with(datumKey, action));
}
- 什么时候调用的 tasks 阻塞队列呢?
当DistroConsistencyServiceImpl
初始化完成后, 会执行 init方法,因为@PostConstruct
注解作用于初始化后执行修饰的方法.
@PostConstruct
public void init() {
// 初始化类后,则提交任务
GlobalExecutor.submitDistroNotifyTask(notifier);
}
public static void submitDistroNotifyTask(Runnable runnable) {
DISTRO_NOTIFY_EXECUTOR.submit(runnable);
}
class Notifier implements Runnable
,其中包含了执行的 run 方法
public void run() {
Loggers.DISTRO.info("distro notifier started");
for (; ; ) {
try {
// 从队列中取出队列,使用 take,取不到,则会阻塞,不会消耗cpu
Pair<String, DataOperation> pair = tasks.take();
// 真正执行的方法
handle(pair);
} catch (Throwable e) {
Loggers.DISTRO.error("[NACOS-DISTRO] Error while handling notifying task", e);
}
}
}
private void handle(Pair<String, DataOperation> pair) {
try {
String datumKey = pair.getValue0();
DataOperation action = pair.getValue1();
// 直接移除,表示 services中包含的一定是还未执行的 datumKey
services.remove(datumKey);
int count = 0;
// 没有监听事件,则返回,因为后面会根据对应的监听事件去操作
if (!listeners.containsKey(datumKey)) {
return;
}
for (RecordListener listener : listeners.get(datumKey)) {
count++;
try {
if (action == DataOperation.CHANGE) {
// 服务变更时,则会执行监听器中的onchange 方法
listener.onChange(datumKey, dataStore.get(datumKey).value);
continue;
}
// 服务下线时执行 onDelete
if (action == DataOperation.DELETE) {
listener.onDelete(datumKey);
continue;
}
} catch (Throwable e) {
Loggers.DISTRO.error("[NACOS-DISTRO] error while notifying listener of key: {}", datumKey, e);
}
}
if (Loggers.DISTRO.isDebugEnabled()) {
Loggers.DISTRO
.debug("[NACOS-DISTRO] datum change notified, key: {}, listener count: {}, action: {}",
datumKey, count, action.name());
}
} catch (Throwable e) {
Loggers.DISTRO.error("[NACOS-DISTRO] Error while handling notifying task", e);
}
}
- 执行服务变更时,会执行
Service#onChange
, 修改注册表,其中核心方法为updateIPs(value.getInstanceList(), KeyBuilder.matchEphemeralInstanceListKey(key))
,
//包括需要修改的实例列表,需要新增的实例列表和需要移除的实例列表
public void updateIPs(Collection<Instance> instances, boolean ephemeral) {
Map<String, List<Instance>> ipMap = new HashMap<>(clusterMap.size());
for (String clusterName : clusterMap.keySet()) {
ipMap.put(clusterName, new ArrayList<>());
}
// 所有的可用实例都重新封装到 ipMap 中
for (Instance instance : instances) {
try {
if (instance == null) {
Loggers.SRV_LOG.error("[NACOS-DOM] received malformed ip: null");
continue;
}
if (StringUtils.isEmpty(instance.getClusterName())) {
instance.setClusterName(UtilsAndCommons.DEFAULT_CLUSTER_NAME);
}
if (!clusterMap.containsKey(instance.getClusterName())) {
Loggers.SRV_LOG
.warn("cluster: {} not found, ip: {}, will create new cluster with default configuration.",
instance.getClusterName(), instance.toJson());
Cluster cluster = new Cluster(instance.getClusterName(), this);
cluster.init();
getClusterMap().put(instance.getClusterName(), cluster);
}
List<Instance> clusterIPs = ipMap.get(instance.getClusterName());
if (clusterIPs == null) {
clusterIPs = new LinkedList<>();
ipMap.put(instance.getClusterName(), clusterIPs);
}
clusterIPs.add(instance);
} catch (Exception e) {
Loggers.SRV_LOG.error("[NACOS-DOM] failed to process ip: " + instance, e);
}
}
for (Map.Entry<String, List<Instance>> entry : ipMap.entrySet()) {
//make every ip mine
List<Instance> entryIPs = entry.getValue();
// 修改每个集群中的实例列表,核心方法,根据copyOnWrite的思想,解决高并发下读写冲突的问题
clusterMap.get(entry.getKey()).updateIps(entryIPs, ephemeral);
}
setLastModifiedMillis(System.currentTimeMillis());
//PushService#serviceChanged,发送服务改变的事件,核心在 onApplicationEvent
getPushService().serviceChanged(this);
StringBuilder stringBuilder = new StringBuilder();
for (Instance instance : allIPs()) {
stringBuilder.append(instance.toIpAddr()).append("_").append(instance.isHealthy()).append(",");
}
Loggers.EVT_LOG.info("[IP-UPDATED] namespace: {}, service: {}, ips: {}", getNamespaceId(), getName(),
stringBuilder.toString());
}
-
遍历 service 中所有的集群,然后将把每个集群的实例列表进行修改。
-
调用 Cluser#updatedIps 更新实例列表:采用copyOnWrite的思想解决并发下读写问题。
-
调用
Cluster#updatedIps(Collection<Instance> newInstance, Collection<Instance> oldInstance)
, 筛选出需要更改的实例列表。
-
调用
Cluster#subtract
, 筛选出新增实例列表和需要被移除的实例列表,将这两种实例列表都分别从健康检查列表中新增或者移除。 最后将最新的实例列表复制到临时节点实例列表ephemeralInstances
或者持久化实例列表persistentInstances
-
调用
PushService#serviceChanged
:给订阅了这个服务的客户端推送最新的服务实例信息,核心在PushService#onApplicationEvent
public void serviceChanged(Service service) {
// merge some change events to reduce the push frequency:
// 合并一些变更事件减少推送频率
if (futureMap
.containsKey(UtilsAndCommons.assembleFullServiceName(service.getNamespaceId(), service.getName()))) {
return;
}
// 发送服务状态变更事件
this.applicationContext.publishEvent(new ServiceChangeEvent(this, service));
}
- 调用
PushService#onApplicationEvent
: 先是去clientMap这个订阅客户端map中获取这个服务的一堆订阅客户端。然后遍历这堆订阅者,先从缓存中获取,默认是不启用这个缓存的,没有的话,就自己准备这个数据,放入缓存一份, 最后是调用udpPush进行推送。
public void onApplicationEvent(ServiceChangeEvent event) {
Service service = event.getService();
String serviceName = service.getName();
String namespaceId = service.getNamespaceId();
// 延迟一秒的执行任务
Future future = GlobalExecutor.scheduleUdpSender(() -> {
try {
Loggers.PUSH.info(serviceName + " is changed, add it to push queue.");
// 找到订阅了该服务的客户端
ConcurrentMap<String, PushClient> clients = clientMap
.get(UtilsAndCommons.assembleFullServiceName(namespaceId, serviceName));
if (MapUtils.isEmpty(clients)) {
return;
}
Map<String, Object> cache = new HashMap<>(16);
long lastRefTime = System.nanoTime();
//遍历客户端
for (PushClient client : clients.values()) {
// 移除僵尸客户端
if (client.zombie()) {
Loggers.PUSH.debug("client is zombie: " + client.toString());
clients.remove(client.toString());
Loggers.PUSH.debug("client is zombie: " + client.toString());
continue;
}
Receiver.AckEntry ackEntry;
Loggers.PUSH.debug("push serviceName: {} to client: {}", serviceName, client.toString());
// 获取缓存key
String key = getPushCacheKey(serviceName, client.getIp(), client.getAgent());
byte[] compressData = null;
Map<String, Object> data = null;
if (switchDomain.getDefaultPushCacheMillis() >= 20000 && cache.containsKey(key)) {
org.javatuples.Pair pair = (org.javatuples.Pair) cache.get(key);
compressData = (byte[]) (pair.getValue0());
data = (Map<String, Object>) pair.getValue1();
Loggers.PUSH.debug("[PUSH-CACHE] cache hit: {}:{}", serviceName, client.getAddrStr());
}
if (compressData != null) {
ackEntry = prepareAckEntry(client, compressData, data, lastRefTime);
} else {
ackEntry = prepareAckEntry(client, prepareHostsData(client), lastRefTime);
if (ackEntry != null) {
// 加入缓存
cache.put(key, new org.javatuples.Pair<>(ackEntry.origin.getData(), ackEntry.data));
}
}
Loggers.PUSH.info("serviceName: {} changed, schedule push for: {}, agent: {}, key: {}",
client.getServiceName(), client.getAddrStr(), client.getAgent(),
(ackEntry == null ? null : ackEntry.key));
// 最终,将数据进行推送
udpPush(ackEntry);
}
} catch (Exception e) {
Loggers.PUSH.error("[NACOS-PUSH] failed to push serviceName: {} to client, error: {}", serviceName, e);
} finally {
futureMap.remove(UtilsAndCommons.assembleFullServiceName(namespaceId, serviceName));
}
}, 1000, TimeUnit.MILLISECONDS);
futureMap.put(UtilsAndCommons.assembleFullServiceName(namespaceId, serviceName), future);
}