目录
Nacos服务发现实现逻辑
com.alibaba.cloud.nacos.discovery.NacosDiscoveryClient#getInstances
public List<ServiceInstance> getInstances(String serviceId) {
try {
// 获取该服务下健康的实例列表
return serviceDiscovery.getInstances(serviceId);
}
catch (Exception e) {
throw new RuntimeException(
"Can not get hosts from nacos server. serviceId: " + serviceId, e);
}
}
com.alibaba.cloud.nacos.discovery.NacosServiceDiscovery#getInstances
public List<ServiceInstance> getInstances(String serviceId) throws NacosException {
// groupId,默认DEFAULT_GROUP
String group = discoveryProperties.getGroup();
// 查询该服务的健康实例列表
List<Instance> instances = discoveryProperties.namingServiceInstance()
.selectInstances(serviceId, group, true);
return hostToServiceInstanceList(instances, serviceId);
}
com.alibaba.nacos.client.naming.NacosNamingService#selectInstances(java.lang.String, java.lang.String, java.util.List<java.lang.String>, boolean, boolean)
public List<Instance> selectInstances(String serviceName, String groupName, List<String> clusters, boolean healthy, boolean subscribe) throws NacosException {
ServiceInfo serviceInfo;
// 是否订阅模式,默认true
if (subscribe) {
// 先通过取本地缓存的服务注册表中的数据,取不到则请求服务端api获取实例列表
serviceInfo = hostReactor.getServiceInfo(NamingUtils.getGroupedName(serviceName, groupName), StringUtils.join(clusters, ","));
} else {
// 直接通过请求服务端api获取实例列表
serviceInfo = hostReactor.getServiceInfoDirectlyFromServer(NamingUtils.getGroupedName(serviceName, groupName), StringUtils.join(clusters, ","));
}
return selectInstances(serviceInfo, healthy);
}
这里先从默认的订阅模式介绍
com.alibaba.nacos.client.naming.core.HostReactor#getServiceInfo
public ServiceInfo getServiceInfo(final String serviceName, final String clusters) {
NAMING_LOGGER.debug("failover-mode: " + failoverReactor.isFailoverSwitch());
// 生成唯一key
String key = ServiceInfo.getKey(serviceName, clusters);
// 判断是否启用故障转移,启用的话会启动1个名为com.alibaba.nacos.naming.failover的线程并定时读取名为00-00—000-VIPSRV_FAILOVER_SWITCH-000—00-00的文件,内容为1时表示开启,此时获取服务信息时会返回FailoverReactor缓存的服务信息,也就是做容灾备份
if (failoverReactor.isFailoverSwitch()) {
return failoverReactor.getService(key);
}
// 从本地缓存注册表serviceInfoMap中获取
ServiceInfo serviceObj = getServiceInfo0(serviceName, clusters);
if (null == serviceObj) {
// 获取不到,则先创建一个ServiceInfo对象
serviceObj = new ServiceInfo(serviceName, clusters);
serviceInfoMap.put(serviceObj.getKey(), serviceObj);
updatingMap.put(serviceName, new Object());
// 调用服务端接口获取最新的实例列表并更新本地缓存服务注册表
updateServiceNow(serviceName, clusters);
updatingMap.remove(serviceName);
} else if (updatingMap.containsKey(serviceName)) {
if (UPDATE_HOLD_INTERVAL > 0) {
// hold a moment waiting for update finish
synchronized (serviceObj) {
try {
serviceObj.wait(UPDATE_HOLD_INTERVAL);
} catch (InterruptedException e) {
NAMING_LOGGER.error("[getServiceInfo] serviceName:" + serviceName + ", clusters:" + clusters, e);
}
}
}
}
// 每秒定时获取该服务的最新实例数据并更新
scheduleUpdateIfAbsent(serviceName, clusters);
return serviceInfoMap.get(serviceObj.getKey());
}
这里先追踪到updateServiceNow方法
com.alibaba.nacos.client.naming.core.HostReactor#updateServiceNow
public void updateServiceNow(String serviceName, String clusters) {
ServiceInfo oldService = getServiceInfo0(serviceName, clusters);
try {
// 请求服务端api获取该实例列表,api地址:/nacos/v1/ns/instance/list
String result = serverProxy.queryList(serviceName, clusters, pushReceiver.getUDPPort(), false);
if (StringUtils.isNotEmpty(result)) {
// 处理响应的结果
processServiceJSON(result);
}
} catch (Exception e) {
NAMING_LOGGER.error("[NA] failed to update serviceName: " + serviceName, e);
} finally {
if (oldService != null) {
synchronized (oldService) {
oldService.notifyAll();
}
}
}
}
com.alibaba.nacos.client.naming.core.HostReactor#processServiceJSON
public ServiceInfo processServiceJSON(String json) {
// 最新的服务数据
ServiceInfo serviceInfo = JSON.parseObject(json, ServiceInfo.class);
// 旧的服务数据
ServiceInfo oldService = serviceInfoMap.get(serviceInfo.getKey());
if (serviceInfo.getHosts() == null || !serviceInfo.validate()) {
//empty or error push, just ignore
return oldService;
}
boolean changed = false;
if (oldService != null) {
if (oldService.getLastRefTime() > serviceInfo.getLastRefTime()) {
NAMING_LOGGER.warn("out of date data received, old-t: " + oldService.getLastRefTime()
+ ", new-t: " + serviceInfo.getLastRefTime());
}
// 旧的服务数据不为空,直接将旧的服务key和新的服务数据放入缓存注册表中
serviceInfoMap.put(serviceInfo.getKey(), serviceInfo);
// 遍历旧服务下所有实例放入到oldHostMap中
Map<String, Instance> oldHostMap = new HashMap<String, Instance>(oldService.getHosts().size());
for (Instance host : oldService.getHosts()) {
oldHostMap.put(host.toInetAddr(), host);
}
// 遍历新服务下所有实例放入到newHostMap中
Map<String, Instance> newHostMap = new HashMap<String, Instance>(serviceInfo.getHosts().size());
for (Instance host : serviceInfo.getHosts()) {
newHostMap.put(host.toInetAddr(), host);
}
Set<Instance> modHosts = new HashSet<Instance>();
Set<Instance> newHosts = new HashSet<Instance>();
Set<Instance> remvHosts = new HashSet<Instance>();
List<Map.Entry<String, Instance>> newServiceHosts = new ArrayList<Map.Entry<String, Instance>>(
newHostMap.entrySet());
// 遍历所有新的实例列表
for (Map.Entry<String, Instance> entry : newServiceHosts) {
// 实例
Instance host = entry.getValue();
// ip + 端口
String key = entry.getKey();
// 旧的实例集合存在该key的实例且实例的数据不同,则添加到modHosts中
if (oldHostMap.containsKey(key) && !StringUtils.equals(host.toString(),
oldHostMap.get(key).toString())) {
modHosts.add(host);
continue;
}
// 旧的实例集合不存在该key的实例,则添加到newHosts中
if (!oldHostMap.containsKey(key)) {
newHosts.add(host);
}
}
// 遍历旧实例集合oldHostMap
for (Map.Entry<String, Instance> entry : oldHostMap.entrySet()) {
Instance host = entry.getValue();
String key = entry.getKey();
if (newHostMap.containsKey(key)) {
continue;
}
// 新的实例集合中不存在该key的实例,则添加到remvHosts中
if (!newHostMap.containsKey(key)) {
remvHosts.add(host);
}
}
if (newHosts.size() > 0) {
changed = true;
NAMING_LOGGER.info("new ips(" + newHosts.size() + ") service: "
+ serviceInfo.getKey() + " -> " + JSON.toJSONString(newHosts));
}
if (remvHosts.size() > 0) {
changed = true;
NAMING_LOGGER.info("removed ips(" + remvHosts.size() + ") service: "
+ serviceInfo.getKey() + " -> " + JSON.toJSONString(remvHosts));
}
if (modHosts.size() > 0) {
changed = true;
NAMING_LOGGER.info("modified ips(" + modHosts.size() + ") service: "
+ serviceInfo.getKey() + " -> " + JSON.toJSONString(modHosts));
}
serviceInfo.setJsonFromServer(json);
// newHosts、remvHosts、modHosts中有一个有值,通过事件分发器加入该服务变更事件与写入到文件中
if (newHosts.size() > 0 || remvHosts.size() > 0 || modHosts.size() > 0) {
eventDispatcher.serviceChanged(serviceInfo);
DiskCache.write(serviceInfo, cacheDir);
}
} else {
// 旧的服务数据为空则直接将新的服务数据的key和value放入到缓存注册表中,并通过事件分发器加入该服务变更事件与写入到文件中
changed = true;
NAMING_LOGGER.info("init new ips(" + serviceInfo.ipCount() + ") service: " + serviceInfo.getKey() + " -> " + JSON
.toJSONString(serviceInfo.getHosts()));
serviceInfoMap.put(serviceInfo.getKey(), serviceInfo);
eventDispatcher.serviceChanged(serviceInfo);
serviceInfo.setJsonFromServer(json);
DiskCache.write(serviceInfo, cacheDir);
}
MetricsMonitor.getServiceInfoMapSizeMonitor().set(serviceInfoMap.size());
if (changed) {
NAMING_LOGGER.info("current ips:(" + serviceInfo.ipCount() + ") service: " + serviceInfo.getKey() +
" -> " + JSON.toJSONString(serviceInfo.getHosts()));
}
return serviceInfo;
}
这里介绍下事件分发eventDispatcher.serviceChanged和文件写入DiskCache.write这两个方法,先从事件分发开始说。
从eventDispatcher.serviceChanged追踪进去可以看出只是将serviceInfo对象加入到一个阻塞队列changedService中,看不出在哪里执行。其实Nacos服务发现最重要的是NacosNamingService类,这个类在初始化的时候用了策略模式将serverProxy、beatReactor、hostReactor等将其构造出来,分别处理注册、心跳的逻辑、实例信息等,其中实例化的也包括EventDispatcher。
com.alibaba.nacos.client.naming.NacosNamingService#init
private void init(Properties properties) {
namespace = InitUtils.initNamespaceForNaming(properties);
initServerAddr(properties);
InitUtils.initWebRootContext();
initCacheDir();
initLogName(properties);
eventDispatcher = new EventDispatcher();
serverProxy = new NamingProxy(namespace, endpoint, serverList, properties);
beatReactor = new BeatReactor(serverProxy, initClientBeatThreadCount(properties));
hostReactor = new HostReactor(eventDispatcher, serverProxy, cacheDir, isLoadCacheAtStart(properties),
initPollingThreadCount(properties));
}
这里看下EventDispatcher的构造方法com.alibaba.nacos.client.naming.core.EventDispatcher#EventDispatcher
public EventDispatcher() {
executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread thread = new Thread(r, "com.alibaba.nacos.naming.client.listener");
thread.setDaemon(true);
return thread;
}
});
executor.execute(new Notifier());
}
这个构造方法就是创建一个单核心线程数的线程池,放入Notifier任务,具体的执行的方法在Notifier类的run方法中。
com.alibaba.nacos.client.naming.core.EventDispatcher.Notifier#run
public void run() {
while (true) {
ServiceInfo serviceInfo = null;
try {
// 从阻塞队列中取出serviceInfo
serviceInfo = changedServices.poll(5, TimeUnit.MINUTES);
} catch (Exception ignore) {
}
if (serviceInfo == null) {
continue;
}
try {
// 从observerMap中取出EventListener集合
List<EventListener> listeners = observerMap.get(serviceInfo.getKey());
if (!CollectionUtils.isEmpty(listeners)) {
for (EventListener listener : listeners) {
List<Instance> hosts = Collections.unmodifiableList(serviceInfo.getHosts());
// 执行onEvent方法
listener.onEvent(new NamingEvent(serviceInfo.getName(), serviceInfo.getGroupName(), serviceInfo.getClusters(), hosts));
}
}
} catch (Exception e) {
NAMING_LOGGER.error("[NA] notify error for service: "
+ serviceInfo.getName() + ", clusters: " + serviceInfo.getClusters(), e);
}
}
}
这里接着看下DiskCache.write方法。从该方法追踪进去可以看出具体的逻辑是将服务数据写入到文件中。为什么要写到文件中?在NacosNamingService类将HostReactor实例化的时候,这里看下HostReactor的构造方法
public HostReactor(EventDispatcher eventDispatcher, NamingProxy serverProxy, String cacheDir,
boolean loadCacheAtStart, int pollingThreadCount) {
executor = new ScheduledThreadPoolExecutor(pollingThreadCount, new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread thread = new Thread(r);
thread.setDaemon(true);
thread.setName("com.alibaba.nacos.client.naming.updater");
return thread;
}
});
this.eventDispatcher = eventDispatcher;
this.serverProxy = serverProxy;
this.cacheDir = cacheDir;
if (loadCacheAtStart) {
this.serviceInfoMap = new ConcurrentHashMap<String, ServiceInfo>(DiskCache.read(this.cacheDir));
} else {
this.serviceInfoMap = new ConcurrentHashMap<String, ServiceInfo>(16);
}
this.updatingMap = new ConcurrentHashMap<String, Object>();
this.failoverReactor = new FailoverReactor(this, cacheDir);
this.pushReceiver = new PushReceiver(this);
}
从这里可以看出,在开启namingLoadCacheAtStart时,会直接读取本地文件存储的服务注册表信息,否则就会新创建一个服务注册表出来。这样做的好处在哪?我理解某个服务突然宕掉,在重启时如果直接读取了本地文件存储的服务信息,那么可以直接从serviceInfoMap中获取得到,而不用重新调用服务端api。
回到com.alibaba.nacos.client.naming.core.HostReactor#getServiceInfo这个方法中,updateServiceNow方法基本上已经介绍完,这里再介绍一下scheduleUpdateIfAbsent方法。
com.alibaba.nacos.client.naming.core.HostReactor#scheduleUpdateIfAbsent
// 这里运用的懒汉式单利模式的双重检查加锁机制判断futureMap中是否含有该服务的值,没有的话再进行加锁,添加一个更新任务
public void scheduleUpdateIfAbsent(String serviceName, String clusters) {
if (futureMap.get(ServiceInfo.getKey(serviceName, clusters)) != null) {
return;
}
synchronized (futureMap) {
if (futureMap.get(ServiceInfo.getKey(serviceName, clusters)) != null) {
return;
}
ScheduledFuture<?> future = addTask(new UpdateTask(serviceName, clusters));
futureMap.put(ServiceInfo.getKey(serviceName, clusters), future);
}
}
com.alibaba.nacos.client.naming.core.HostReactor.UpdateTask#run
public void run() {
long delayTime = -1;
try {
// 从缓存的serviceInfoMap获取该服务的信息
ServiceInfo serviceObj = serviceInfoMap.get(ServiceInfo.getKey(serviceName, clusters));
// 获取不到则直接请求服务端获取然后更新
if (serviceObj == null) {
updateServiceNow(serviceName, clusters);
// 下次任务执行的延时时间为10s
delayTime = DEFAULT_DELAY;
return;
}
// 当前服务未及时更新 进行更新操作
//判断服务是否已过期,当前服务的最后一次更新时间 <= 全局的最后一次更新
if (serviceObj.getLastRefTime() <= lastRefTime) {
// 远程调用服务列表,更新本地缓存的服务列表
updateServiceNow(serviceName, clusters);
serviceObj = serviceInfoMap.get(ServiceInfo.getKey(serviceName, clusters));
} else {
//如果服务已经被基于push机制的情况下做了更新,那么我们不需要覆盖本地服务。
//因为push过来的数据和pull数据不同,所以这里只是调用请求去刷新服务
// if serviceName already updated by push, we should not override it
// since the push data may be different from pull through force push
refreshOnly(serviceName, clusters);
}
// 设置服务最新的更新时间
lastRefTime = serviceObj.getLastRefTime();
// 订阅被取消,如果没有实现订阅或者futureMap中不包含指定服务信息,则中断更新请求
if (!eventDispatcher.isSubscribed(serviceName, clusters) &&
!futureMap.containsKey(ServiceInfo.getKey(serviceName, clusters))) {
// abort the update task:
NAMING_LOGGER.info("update task is stopped, service:" + serviceName + ", clusters:" + clusters);
return;
}
delayTime = serviceObj.getCacheMillis();
} catch (Throwable e) {
NAMING_LOGGER.warn("[NA] failed to update serviceName: " + serviceName, e);
} finally {
if (delayTime > 0) {
executor.schedule(this, delayTime, TimeUnit.MILLISECONDS);
}
}
}
服务端服务发现实现逻辑
服务端这边的接口在com.alibaba.nacos.naming.controllers.InstanceController#list,这里暂不细讲,里面有一块比较重要的点为这里客户端在请求服务端时,服务端不止返回响应结果,还可能有推送逻辑,这里就是为什么客户端那个定时任务在判断已更新的时候执行refreshOnly时,里面还有调用服务端的api的原因。这里具体看下该代码块
try {
if (udpPort > 0 && pushService.canEnablePush(agent)) {
// 封装PushClient对象,放入到clientMap中
pushService.addClient(namespaceId, serviceName,
clusters,
agent,
new InetSocketAddress(clientIP, udpPort),
pushDataSource,
tid,
app);
cacheMillis = switchDomain.getPushCacheMillis(serviceName);
}
} catch (Exception e) {
Loggers.SRV_LOG.error("[NACOS-API] failed to added push client {}, {}:{}", clientInfo, clientIP, udpPort, e);
cacheMillis = switchDomain.getDefaultCacheMillis();
}
在推送的时候,这里的推送的具体执行方法和服务注册的serviceChanged变更事件一样都是调用com.alibaba.nacos.naming.push.PushService#onApplicationEvent这个方法,具体详解可以到服务注册章节查看。
客户端接收的源码在om.alibaba.nacos.client.naming.core.PushReceiver#run
里面具体的逻辑处理服务端发送的服务数据信息,在类型为dom或service时,会更新本地缓存的服务注册表数据,为dump类型时,将本地缓存的服务注册表通过回调发送给服务端。
总结
服务发现不仅仅是客户端主动拉取,还有服务端的推送。