客户端流程
客户端注册服务实例
Nacos Client会通过发送REST请求的方式向Nacos Server注册自己的服务,提供自身的元数据,比如ip地址、端口等信息。Nacos Server接收到注册请求后,就会把这些元数据信息存储在一个双层的内存Map中。
spring-cloud-starter-alibaba-nacos-discovery客户端starter的spring.factories中的NacosServiceRegistryAutoConfiguration配置类里,定义了服务自动注册的NacosAutoServiceRegistration
NacosAutoServiceRegistration实现了监听时间,在spring容器启动完成后会调用onApplicationEvent方法,在该方法中实现了将客户端服务注册到注册中心的功能
// AbstractAutoServiceRegistration类
// spring容器启动完成调用
public void onApplicationEvent(WebServerInitializedEvent event) {
this.bind(event);
}
@Deprecated
public void bind(WebServerInitializedEvent event) {
ApplicationContext context = event.getApplicationContext();
if (!(context instanceof ConfigurableWebServerApplicationContext) || !"management".equals(((ConfigurableWebServerApplicationContext)context).getServerNamespace())) {
this.port.compareAndSet(0, event.getWebServer().getPort());
// 执行start方法
this.start();
}
}
public void start() {
this.context.publishEvent(new InstancePreRegisteredEvent(this, this.getRegistration()));
// 开始服务注册
this.register();
if (this.shouldRegisterManagement()) {
this.registerManagement();
}
this.context.publishEvent(new InstanceRegisteredEvent(this, this.getConfiguration()));
this.running.compareAndSet(false, true);
}
// 服务注册
protected void register() {
// this.getRegistration()则是拿到客户端服务的元数据,比如ip地址、端口等信息
this.serviceRegistry.register(this.getRegistration());
}
ServiceRegistry<R extends Registration> 是spring cloud提供的服务注册接口规范,nacos、eureka等都是按此接口实现的,其中nacos的实现类是NacosServiceRegistry
// NacosServiceRegistry类
@Override
public void register(Registration registration) {
// 获取NamingService
NamingService namingService = namingService();
// 获取serviceId
String serviceId = registration.getServiceId();
String group = nacosDiscoveryProperties.getGroup();
// 根据客户端信息创建服务实例(包含IP、Host、权重、元数据、是否临时实例等信息)
Instance instance = getNacosInstanceFromRegistration(registration);
try {
// 拿到serviceId,所属组,服务实例进行服务注册
namingService.registerInstance(serviceId, group, instance);
}
catch (Exception e) {
}
}
将实例信息instance映射到params注册参数中,getServerList()通过配置获取nacos服务端地址,注册url为/nacos/v1/ns/instance,然后使用restTemplate调用nacos服务端接口,进行服务注册
// NacosNamingService 类
@Override
public void registerInstance(String serviceName, String groupName, Instance instance) throws NacosException {
// 校验实例时间 待确认
NamingUtils.checkInstanceIsLegal(instance);
// 将组名和服务名用"@@"拼接在一起
String groupedServiceName = NamingUtils.getGroupedName(serviceName, groupName);
if (instance.isEphemeral()) {
// 临时实例,则需要做心跳检查,这块心跳检查在分析
BeatInfo beatInfo = beatReactor.buildBeatInfo(groupedServiceName, instance);
beatReactor.addBeatInfo(groupedServiceName, beatInfo);
}
// 服务注册
serverProxy.registerService(groupedServiceName, groupName, instance);
}
public void registerService(String serviceName, String groupName, Instance instance) throws NacosException {
final Map<String, String> params = new HashMap<String, String>(16);
// 将客户端信息组装到params中,然后通过http请求调用服务端进行注册
// 命名空间
params.put(CommonParams.NAMESPACE_ID, namespaceId);
// 服务名
params.put(CommonParams.SERVICE_NAME, serviceName);
// 组名
params.put(CommonParams.GROUP_NAME, groupName);
// 集群名
params.put(CommonParams.CLUSTER_NAME, instance.getClusterName());
params.put("ip", instance.getIp());
params.put("port", String.valueOf(instance.getPort()));
// 权重
params.put("weight", String.valueOf(instance.getWeight()));
// 是否可用
params.put("enable", String.valueOf(instance.isEnabled()));
// 是否健康
params.put("healthy", String.valueOf(instance.isHealthy()));
// 是否临时节点
params.put("ephemeral", String.valueOf(instance.isEphemeral()));
// 实例元数据
params.put("metadata", JacksonUtils.toJson(instance.getMetadata()));
// UtilAndComs.nacosUrlInstance为注册url /nacos/v1/ns/instance
reqApi(UtilAndComs.nacosUrlInstance, params, HttpMethod.POST);
}
public String reqApi(String api, Map<String, String> params, Map<String, String> body, String method)
throws NacosException {
// getServerList() 获取Nacos服务端列表,进行注册
return reqApi(api, params, body, getServerList(), method);
}
public String reqApi(String api, Map<String, String> params, Map<String, String> body, List<String> servers,
String method) throws NacosException {
Random random = new Random(System.currentTimeMillis());
int index = random.nextInt(servers.size());
for (int i = 0; i < servers.size(); i++) {
String server = servers.get(index);
try {
// 通过http请求调用Nacos服务端,进行服务注册
return callServer(api, params, body, server, method);
} catch (NacosException e) {
}
index = (index + 1) % servers.size();
}
}
客户端服务心跳
在注册服务实例同时,会发送心跳报文以达到服务保活,构建心跳信息beatInfo,通过延迟定时任务每隔5s(默认)向nacos服务端发送心跳报文;心跳响应如果有clientBeatInterval(服务端默认配置5s),则下次心跳延迟clientBeatInterval后执行,否则下次心跳时间还是5s之后。
// BeatReactor 类
public BeatInfo buildBeatInfo(String groupedServiceName, Instance instance) {
// 构造心跳报文数据
BeatInfo beatInfo = new BeatInfo();
.....
// 设置心跳周期,默认5s
beatInfo.setPeriod(instance.getInstanceHeartBeatInterval());
return beatInfo;
}
class BeatTask implements Runnable {
@Override
public void run() {
long nextTime = beatInfo.getPeriod();
try {
// 请求naocs服务端,发送心跳请求:/instance/beat
JsonNode result = serverProxy.sendBeat(beatInfo, BeatReactor.this.lightBeatEnabled);
// 从nacos服务端拿到 下次心跳的间隔时间
long interval = result.get("clientBeatInterval").asLong();
boolean lightBeatEnabled = false;
if (result.has(CommonParams.LIGHT_BEAT_ENABLED)) {
lightBeatEnabled = result.get(CommonParams.LIGHT_BEAT_ENABLED).asBoolean();
}
BeatReactor.this.lightBeatEnabled = lightBeatEnabled;
if (interval > 0) {
// 将延迟时间更改为服务端返回的延迟时间
nextTime = interval;
}
int code = NamingResponseCode.OK;
if (result.has(CommonParams.CODE)) {
code = result.get(CommonParams.CODE).asInt();
}
if (code == NamingResponseCode.RESOURCE_NOT_FOUND) {
Instance instance = new Instance();
instance.setPort(beatInfo.getPort());
......
try {
// 因网络等原因心跳失败,则需要重新注册
serverProxy.registerService(beatInfo.getServiceName(),
NamingUtils.getGroupName(beatInfo.getServiceName()), instance);
} catch (Exception ignore) {
}
}
} catch (NacosException ex) {
}
// 延迟nextTime时间,继续执行心跳任务
executorService.schedule(new BeatTask(beatInfo), nextTime, TimeUnit.MILLISECONDS);
}
}
服务端流程
服务端注册实例
通过客户端调用url:/nacos/v1/ns/instance,可以找到服务端入口InstanceController类
1、将请求参数转换为服务端注册实例Instance
2、将Service添加到serviceMap集合中,此时Service下的Cluster里还没有实例信息
3、调用ConsistencyService#put方法插入数据
3.1、临时节点,调用DistroConsistencyServiceImpl#put方法
- 将实例信息存储在dataStore集合,并发布数据变更任务到阻塞队列
- 同步实例信息到nacos集群其他节点,此时服务注册同步逻辑执行完毕,给客户端响应成功
- 异步线程死循环从阻塞队列获取实例数据,将实例数据更新到Cluster下的临时实例集合中
3.2、持久化节点,调用RaftConsistencyServiceImpl#put方法
- 若不是leader节点,则将数据发送给leader节点
- leader将写数据请求发送其他follower节点,只有半数以上节点写入成功,才算成功;此时服务注册同步逻辑执行完毕,给客户端响应成功。
- 异步发送节点变更事件,将实例数据更新到Cluster下的持久化实例集合中
// InstanceController 类
@CanDistro
@PostMapping
@Secured(parser = NamingResourceParser.class, action = ActionTypes.WRITE)
public String register(HttpServletRequest request) throws Exception {
// 获取namespaceId
final String namespaceId = WebUtils
.optional(request, CommonParams.NAMESPACE_ID, Constants.DEFAULT_NAMESPACE_ID);
// 获取服务名
final String serviceName = WebUtils.required(request, CommonParams.SERVICE_NAME);
// 校验serviceName是否为组名和服务名通过@@拼接的
NamingUtils.checkServiceNameFormat(serviceName);
// 将请求参数组装为服务端注册实例
final Instance instance = parseInstance(request);
// 服务注册
serviceManager.registerInstance(namespaceId, serviceName, instance);
return "ok";
}
// ServiceManager 类
@Resource(name = "consistencyDelegate")
private ConsistencyService consistencyService;
public void registerInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
// 创建Service服务信息
createEmptyService(namespaceId, serviceName, instance.isEphemeral());
// 从serviceMap缓存中获取service服务
Service service = getService(namespaceId, serviceName);
if (service == null) {
throw new NacosException(NacosException.INVALID_PARAM,
"service not found, namespace: " + namespaceId + ", service: " + serviceName);
}
// 向service服务中注册实例
addInstance(namespaceId, serviceName, instance.isEphemeral(), instance);
}
public void createServiceIfAbsent(String namespaceId, String serviceName, boolean local, Cluster cluster)
throws NacosException {
// 根据命名空间和服务名先从serviceMap缓存集合获取,能拿到则直接返回;
// 获取不到则创建新的Service, 并将其添加到serviceMap中
Service service = getService(namespaceId, serviceName);
if (service == null) {
// 第一次注册,会初始化service
service = new Service();
service.setName(serviceName);
.........
putServiceAndInit(service);
if (!local) {
addOrReplaceService(service);
}
}
}
// 从缓存serviceMap中获取Service
public Service getService(String namespaceId, String serviceName) {
if (serviceMap.get(namespaceId) == null) {
return null;
}
return chooseServiceMap(namespaceId).get(serviceName);
}
// 将Service插入serviceMap集合
private void putService(Service service) throws NacosException {
if (!serviceMap.containsKey(service.getNamespaceId())) {
// 命名空间不存在,则初始化命名空间
synchronized (putServiceLock) {
if (!serviceMap.containsKey(service.getNamespaceId())) {
serviceMap.put(service.getNamespaceId(), new ConcurrentSkipListMap<>());
}
}
}
// 将Service添加到对应的命名空间里
serviceMap.get(service.getNamespaceId()).putIfAbsent(service.getName(), service);
}
public void addInstance(String namespaceId, String serviceName, boolean ephemeral, Instance... ips)
throws NacosException {
// 构建key
// 临时节点:com.alibaba.nacos.naming.iplist.ephemeral.命名空间##服务名
// 持久化节点:com.alibaba.nacos.naming.iplist.命名空间##服务名
String key = KeyBuilder.buildInstanceListKey(namespaceId, serviceName, ephemeral);
// 从缓存serviceMap中获取Service
Service service = getService(namespaceId, serviceName);
// 防止同一服务同时注册并发问题
synchronized (service) {
// 将新注册的服务实例加到服务实例列表中
List<Instance> instanceList = addIpAddresses(service, ephemeral, ips);
Instances instances = new Instances();
instances.setInstanceList(instanceList);
consistencyService.put(key, instances);
}
}
private List<Instance> addIpAddresses(Service service, boolean ephemeral, Instance... ips) throws NacosException {
// 新增服务实例
return updateIpAddresses(service, UtilsAndCommons.UPDATE_INSTANCE_ACTION_ADD, ephemeral, ips);
}
public List<Instance> updateIpAddresses(Service service, String action, boolean ephemeral, Instance... ips)
throws NacosException {
// 实例首次注册,构造cluster并加入到service中;并为instance实例生成instanceId
// 实例非首次注册,则更新健康状态和心跳时间
// 获取当前内存集合dataStore中已注册的服务实例
Datum datum = consistencyService
.get(KeyBuilder.buildInstanceListKey(service.getNamespaceId(), service.getName(), ephemeral));
Map<String, Instance> instanceMap;
if (datum != null && null != datum.value) {
// 非首次注册,若是同一个实例(同IP+port),则更新健康状态和心跳时间
instanceMap = setValid(((Instances) datum.value).getInstanceList(), currentInstances);
} else {
instanceMap = new HashMap<>(ips.length);
}
// 遍历服务实例
for (Instance instance : ips) {
if (!service.getClusterMap().containsKey(instance.getClusterName())) {
// 首次注册时进入,构造cluster信息并加入到service中
Cluster cluster = new Cluster(instance.getClusterName(), service);
cluster.init();
service.getClusterMap().put(instance.getClusterName(), cluster);
}
if (UtilsAndCommons.UPDATE_INSTANCE_ACTION_REMOVE.equals(action)) {
instanceMap.remove(instance.getDatumKey());
} else {
// 从本地实例map中获取旧的服务实例
Instance oldInstance = instanceMap.get(instance.getDatumKey());
if (oldInstance != null) {
// 若存在,则将旧实例id更新到当前实例中
instance.setInstanceId(oldInstance.getInstanceId());
} else {
// 首次注册instanceMap为空 生成InstanceId (例如10.200.78.27#8021#DEFAULT#DEFAULT_GROUP@@nacos-server)
instance.setInstanceId(instance.generateInstanceId(currentInstanceIds));
}
// 将当前实例存入map中
instanceMap.put(instance.getDatumKey(), instance);
}
}
return new ArrayList<>(instanceMap.values());
}
consistencyService.put(key, instances)分析,可以看到consistencyService注入的bean名字是consistencyDelegate,正是DelegateConsistencyServiceImpl类,该类则通过是否临时实例标识来调用不同的实现类进行处理。
AP模式临时实例使用EphemeralConsistencyService,具体逻辑在实现类DistroConsistencyServiceImpl中
CP模式持久化实例使用PersistentConsistencyServiceDelegateImpl,实际使用RaftConsistencyServiceImpl
// DelegateConsistencyServiceImpl类
@Override
public void put(String key, Record value) throws NacosException {
mapConsistencyService(key).put(key, value);
}
private ConsistencyService mapConsistencyService(String key) {
// 匹配到是临时实例,则用EphemeralConsistencyService
// 否则用PersistentConsistencyServiceDelegateImpl
return KeyBuilder.matchEphemeralKey(key) ? ephemeralConsistencyService : persistentConsistencyService;
}
临时实例DistroConsistencyServiceImpl
- 初始化时执行init方法,执行Notifier的run方法
- put写注册表信息,将服务实例更新到内存注册表dataMap中,并同步实例信息到nacos集群其他节点。
// DistroConsistencyServiceImpl类
private volatile Notifier notifier = new Notifier();
@PostConstruct
public void init() {
// bean初始化时执行改方法,执行notifier任务
GlobalExecutor.submitDistroNotifyTask(notifier);
}
@Override
public void put(String key, Record value) throws NacosException {
// 将注册实例更新到内存注册表中
onPut(key, value);
// 同步实例信息到nacos集群其他节点
distroProtocol.sync(new DistroKey(key, KeyBuilder.INSTANCE_LIST_KEY_PREFIX), DataOperation.CHANGE,
globalConfig.getTaskDispatchPeriod() / 2);
// 执行完毕,则服务注册同步流程结束
}
public void onPut(String key, Record value) {
// 匹配是否为临时实例
if (KeyBuilder.matchEphemeralInstanceListKey(key)) {
Datum<Instances> datum = new Datum<>();
datum.value = (Instances) value;
datum.key = key;
datum.timestamp.incrementAndGet();
// 将数据插入到本地dataMap集合中
dataStore.put(key, datum);
}
if (!listeners.containsKey(key)) {
return;
}
// 往阻塞队列tasks放入注册实例信息
notifier.addTask(key, DataOperation.CHANGE);
}
// 异步更新service下的实例cluster里的实例信息
public class Notifier implements Runnable {
private ConcurrentHashMap<String, String> services = new ConcurrentHashMap<>(10 * 1024);
private BlockingQueue<Pair<String, DataOperation>> tasks = new ArrayBlockingQueue<>(1024 * 1024);
........
// 死循环遍历,从阻塞队列tasks获取任务,调用handle方法处理
private void handle(Pair<String, DataOperation> pair) {
try {
String datumKey = pair.getValue0();
DataOperation action = pair.getValue1();
for (RecordListener listener : listeners.get(datumKey)) {
// 遍历Service的所有实例
count++;
try {
// 节点变更事件,则执行onChange
if (action == DataOperation.CHANGE) {
// 注册实例调用此方法
listener.onChange(datumKey, dataStore.get(datumKey).value);
continue;
}
// 节点删除事件,则执行onDelete
if (action == DataOperation.DELETE) {
listener.onDelete(datumKey);
continue;
}
} catch (Throwable e) {
}
}
} catch (Throwable e) {
}
}
}
listener.onChange节点变更逻辑,执行Service的onChange方法,再调用updateIPs方法进行处理
// Service类
// Service下的集群clusterMap集合
private Map<String, Cluster> clusterMap = new HashMap<>();
// 更新Service下的cluster集群信息
// 并调用Cluster#updateIps方法
public void updateIPs(Collection<Instance> instances, boolean ephemeral) {
// 定义集群clusterMap,key为clusterName,value为实例Instance列表
Map<String, List<Instance>> ipMap = new HashMap<>(clusterMap.size());
for (String clusterName : clusterMap.keySet()) {
// 初始化集群ipMap
ipMap.put(clusterName, new ArrayList<>());
}
for (Instance instance : instances) {
// 遍历所有实例
try {
// 设置默认集群名
if (StringUtils.isEmpty(instance.getClusterName())) {
instance.setClusterName(UtilsAndCommons.DEFAULT_CLUSTER_NAME);
}
if (!clusterMap.containsKey(instance.getClusterName())) {
// 此Service下不存在该集群ClusterName,则创建新的,并加入clusterMap集合
Cluster cluster = new Cluster(instance.getClusterName(), this);
cluster.init();
getClusterMap().put(instance.getClusterName(), cluster);
}
// 再次判断,若该集群名对应的实例列表为空,则重新初始化
List<Instance> clusterIPs = ipMap.get(instance.getClusterName());
if (clusterIPs == null) {
clusterIPs = new LinkedList<>();
ipMap.put(instance.getClusterName(), clusterIPs);
}
// 将实例信息添加到当前集群对应的实例集合中
clusterIPs.add(instance);
} catch (Exception e) {
Loggers.SRV_LOG.error("[NACOS-DOM] failed to process ip: " + instance, e);
}
}
// 遍历集群实例Instance列表,调用Cluster的updateIps更新集群下实例信息
for (Map.Entry<String, List<Instance>> entry : ipMap.entrySet()) {
//make every ip mine
List<Instance> entryIPs = entry.getValue();
// 更新cluster下的实例信息
clusterMap.get(entry.getKey()).updateIps(entryIPs, ephemeral);
}
// 设置Service最近一次更新的时间戳
setLastModifiedMillis(System.currentTimeMillis());
getPushService().serviceChanged(this);
}
执行Cluster的updateIps方法,更新具体的实例信息,此处更新利用了写时复制技术,从老的集合中复制一份数据用来更新数据,而查询时继续从原来的实例中获取。
// Cluster
// 持久化实例集合
private Set<Instance> persistentInstances = new HashSet<>();
// 临时实例集合
private Set<Instance> ephemeralInstances = new HashSet<>();
// 更新Cluster下的实例信息
public void updateIps(List<Instance> ips, boolean ephemeral) {
// 根据临时实例标识ephemeral,获取对应需要更新的实例列表
Set<Instance> toUpdateInstances = ephemeral ? ephemeralInstances : persistentInstances;
// 将原来的实例列表暂时存入oldIpMap集合中
HashMap<String, Instance> oldIpMap = new HashMap<>(toUpdateInstances.size());
for (Instance ip : toUpdateInstances) {
oldIpMap.put(ip.getDatumKey(), ip);
}
List<Instance> updatedIPs = updatedIps(ips, oldIpMap.values());
toUpdateInstances = new HashSet<>(ips);
// 替换实例集合
if (ephemeral) {
// 临时实例
ephemeralInstances = toUpdateInstances;
} else {
// 持久化实例
persistentInstances = toUpdateInstances;
}
}
持久化实例RaftConsistencyServiceImpl
- 若不是leader节点,则将数据发送给leader节点
- leader将写数据请求发送其他follower节点,只有半数以上节点写入成功,才算成功;此时服务注册同步
// RaftConsistencyServiceImpl持久化实例
@Override
public void put(String key, Record value) throws NacosException {
checkIsStopWork();
try {
// 写入数据
raftCore.signalPublish(key, value);
} catch (Exception e) {
}
}
public void signalPublish(String key, Record value) throws Exception {
if (!isLeader()) {
// 如果不是leader节点,则将写入消息发给leader处理
ObjectNode params = JacksonUtils.createEmptyJsonNode();
params.put("key", key);
params.replace("value", JacksonUtils.transferToJsonNode(value));
Map<String, String> parameters = new HashMap<>(1);
parameters.put("key", key);
final RaftPeer leader = getLeader();
// 将消息转发给leader处理 /raft/datum
raftProxy.proxyPostLarge(leader.ip, API_PUB, params.toString(), parameters);
return;
}
OPERATE_LOCK.lock();
try {
final long start = System.currentTimeMillis();
final Datum datum = new Datum();
datum.key = key;
datum.value = value;
if (getDatum(key) == null) {
datum.timestamp.set(1L);
} else {
datum.timestamp.set(getDatum(key).timestamp.incrementAndGet());
}
ObjectNode json = JacksonUtils.createEmptyJsonNode();
json.replace("datum", JacksonUtils.transferToJsonNode(datum));
json.replace("source", JacksonUtils.transferToJsonNode(peers.local()));
// leader节点 发布数据,将数据写入内存datums集合、持久化到磁盘文件、并更新选举周期
onPublish(datum, peers.local());
final String content = json.toString();
// 使用CountDownLatch计数器,peers.majorityCount()=peers.size() / 2 + 1,即半数以上
// 收到半数以上节点写入成功的消息,才会countDown到0,并唤醒主线程结束
final CountDownLatch latch = new CountDownLatch(peers.majorityCount());
for (final String server : peers.allServersIncludeMyself()) {
if (isLeader(server)) {
// 主节点数据已经写入成功,直接countDown
latch.countDown();
continue;
}
// 同步follower写入数据,/raft/datum/commit
final String url = buildUrl(server, API_ON_PUB);
HttpClient.asyncHttpPostLarge(url, Arrays.asList("key", key), content, new Callback<String>() {
@Override
public void onReceive(RestResult<String> result) {
if (!result.ok()) {
return;
}
// follower写入成功,执行countDown
latch.countDown();
}
});
}
// 主线程执行await等待 默认等待5s
if (!latch.await(UtilsAndCommons.RAFT_PUBLISH_TIMEOUT, TimeUnit.MILLISECONDS)) {
Loggers.RAFT.error("data publish failed, caused failed to notify majority, key={}", key);
throw new IllegalStateException("data publish failed, caused failed to notify majority, key=" + key);
}
long end = System.currentTimeMillis();
} finally {
OPERATE_LOCK.unlock();
}
}
follower通过RaftController(/datum/commit)接收到同步的数据
// RaftController类
@PostMapping("/datum/commit")
public String onPublish(HttpServletRequest request, HttpServletResponse response) throws Exception {
// 请求解码,并拿到同步的实例数据datum
String entity = IoUtils.toString(request.getInputStream(), "UTF-8");
String value = URLDecoder.decode(entity, "UTF-8");
JsonNode jsonObject = JacksonUtils.toObj(value);
RaftPeer source = JacksonUtils.toObj(jsonObject.get("source").toString(), RaftPeer.class);
JsonNode datumJson = jsonObject.get("datum");
Datum datum = null;
if (KeyBuilder.matchInstanceListKey(datumJson.get(key).asText())) {
// 同步实例数据
datum = JacksonUtils.toObj(jsonObject.get("datum").toString(), new TypeReference<Datum<Instances>>() {
});
}
// 开始数据写入
raftConsistencyService.onPut(datum, source);
return "ok";
}
raftConsistencyService.onPut会调用RaftCore#onPublish写入数据,将数据持久化到磁盘,更新数据到本地内存datums集合中,并更新follower节点选举周期
// RaftCore类
public void onPublish(Datum datum, RaftPeer source) throws Exception {
RaftPeer local = peers.local();
// 接收到的数据源不是leader节点不处理
if (!peers.isLeader(source.ip)) {
throw new IllegalStateException("peer(" + source.ip + ") tried to publish " + "data but wasn't leader");
}
// 接收到的数据源的选举周期小于本节点的周期 不处理
if (source.term.get() < local.term.get()) {
throw new IllegalStateException(
"out of date publish, pub-term:" + source.term.get() + ", cur-term: " + local.term.get());
}
local.resetLeaderDue();
// 将数据持久化到磁盘
if (KeyBuilder.matchPersistentKey(datum.key)) {
raftStore.write(datum);
}
// 将数据写入本地内存datums集合
datums.put(datum.key, datum);
if (isLeader()) {
local.term.addAndGet(PUBLISH_TERM_INCREASE_COUNT);
} else {
if (local.term.get() + PUBLISH_TERM_INCREASE_COUNT > source.term.get()) {
//更新本节点leader的选举周期
getLeader().term.set(source.term.get());
local.term.set(getLeader().term.get());
} else {
local.term.addAndGet(PUBLISH_TERM_INCREASE_COUNT);
}
}
raftStore.updateTerm(local.term.get());
// 发布节点变更事件ValueChangeEvent,将实例数据更新到Cluster下的持久化实例集合中
NotifyCenter.publishEvent(ValueChangeEvent.builder().key(datum.key).action(DataOperation.CHANGE).build());
}
DefaultPublisher调用subscriber.onEvent事件方法,调用PersistentNotifier#onEvent方法,最终会调用Service#onChange方法更新实例信息(更新逻辑和临时实例一样)
// PersistentNotifier类
@Override
public void onEvent(ValueChangeEvent event) {
notify(event.getKey(), event.getAction(), find.apply(event.getKey()));
}
public <T extends Record> void notify(final String key, final DataOperation action, final T value) {
for (RecordListener listener : listenerMap.get(key)) {
try {
//最终调用service#onChange方法,更新实例信息 此逻辑和临时实例一样
if (action == DataOperation.CHANGE) {
listener.onChange(key, value);
continue;
}
if (action == DataOperation.DELETE) {
listener.onDelete(key);
}
} catch (Throwable e) {
Loggers.RAFT.error("[NACOS-RAFT] error while notifying listener of key: {}", key, e);
}
}
}
服务端心跳处理
InstanceController接收到客户端心跳请求,若发送心跳的服务不存在实例数据,则重新注册服务;否则异步调用ClientBeatProcessor任务进行心跳处理,更新服务实例最后心跳时间。
// InstanceController类
@CanDistro
@PutMapping("/beat")
@Secured(parser = NamingResourceParser.class, action = ActionTypes.WRITE)
public ObjectNode beat(HttpServletRequest request) throws Exception {
// 从请求体获取参数
ObjectNode result = JacksonUtils.createEmptyJsonNode();
// 服务端建议客户端心跳时间,默认也是5s
result.put(SwitchEntry.CLIENT_BEAT_INTERVAL, switchDomain.getClientBeatInterval());
String beat = WebUtils.optional(request, "beat", StringUtils.EMPTY);
........
// 获取实例数据
Instance instance = serviceManager.getInstance(namespaceId, serviceName, clusterName, ip, port);
if (instance == null) {
// 实例数据不存在,则重新进行服务注册
instance = new Instance();
...................
serviceManager.registerInstance(namespaceId, serviceName, instance);
}
// 拿到service服务信息
Service service = serviceManager.getService(namespaceId, serviceName);
// 进行心跳处理
service.processClientBeat(clientBeat);
result.put(CommonParams.CODE, NamingResponseCode.OK);
if (instance.containsMetadata(PreservedMetadataKeys.HEART_BEAT_INTERVAL)) {
// 默认5s
result.put(SwitchEntry.CLIENT_BEAT_INTERVAL, instance.getInstanceHeartBeatInterval());
}
// 默认true
result.put(SwitchEntry.LIGHT_BEAT_ENABLED, switchDomain.isLightBeatEnabled());
return result;
}
// ClientBeatProcessor类
@Override
public void run() {
Service service = this.service;
// 获取所有服务实例信息
List<Instance> instances = cluster.allIPs(true);
for (Instance instance : instances) {
// 心跳报文的ip、port与服务实例一致才进行处理
if (instance.getIp().equals(ip) && instance.getPort() == port) {
// 更新实例最后心跳时间
instance.setLastBeat(System.currentTimeMillis());
if (!instance.isMarked()) {
// 若实例不是健康状态,则更新为健康状态
if (!instance.isHealthy()) {
instance.setHealthy(true);
// 发布service变更事件,主要用来将变更的实例推送到客户端
getPushService().serviceChanged(service);
}
}
}
}
}
服务端健康检查(探活)
在服务注册执行putServiceAndInit方法时,调用Service#init方法,一个服务Service对应一个任务,开启健康检查定时任务ClientBeatCheckTask,默认延迟5s后执行,若某个实例超过15s没有收到心跳,则认为实例不健康;若某个实例超过30s没有收到心跳,则删除该实例。
// ClientBeatCheckTask类
@Override
public void run() {
try {
// 拿到该服务的所有实例
List<Instance> instances = service.allIPs(true);
// first set health status of instances:
for (Instance instance : instances) {
// 当前时间 - 最后一次心跳时间 大于 心跳超时时间(默认15s)则认为服务不健康
if (System.currentTimeMillis() - instance.getLastBeat() > instance.getInstanceHeartBeatTimeOut()) {
if (!instance.isMarked()) {
if (instance.isHealthy()) {
// 设置为非健康
instance.setHealthy(false);
// 发布service变更事件,主要用来将变更的实例推送到客户端
getPushService().serviceChanged(service);
ApplicationUtils.publishEvent(new InstanceHeartbeatTimeoutEvent(this, instance));
}
}
}
}
for (Instance instance : instances)
// 当前时间 - 最后一次心跳时间 大于 节点删除超时时间(默认30s)则删掉该服务实例
if (System.currentTimeMillis() - instance.getLastBeat() > instance.getIpDeleteTimeout()) {
deleteIp(instance);
}
}
} catch (Exception e) {
}
}