EurekaClient
在Eureka Client启动流程一节的最后说到,instanceInfoReplicator.start(clientConfig.getInitialInstanceInfoReplicationIntervalSeconds());里有服务注册的逻辑,所以就从这里开始看
public void start(int initialDelayMs) {
// 将started设置为true,表示已启动了
if (started.compareAndSet(false, true)) {
// 设置脏标志,以便在下一次心跳时将实例信息发送到服务器
instanceInfo.setIsDirty(); // for initial register
// 开启一个延迟40s的任务
Future next = scheduler.schedule(this, initialDelayMs, TimeUnit.SECONDS);
// 保存结果
scheduledPeriodicRef.set(next);
}
}
这里的任务就是InstanceInfoReplicator类本身。先不看注册逻辑是怎样的,这里可以看出这个注册任务是延迟40s后才执行的,网上的一些博客资料也是如此分析的,但是通过debug和日志分析,发现eureka并没有延迟注册,而是立即注册的,这是怎么回事呢?
通过debug分析,一路往上找,在EurekaClientAutoConfiguration类中找到了立即注册的入口
这里创建了一个EurekaAutoServiceRegistration,并且条件默认是生效的
该类实现了SmartLifecycle接口,所以在创建bean时会调用start方法,通过debug,调用链如下
最终调到了DiscoveryClient的register方法
注册任务执行完后,会重新创建一个延迟注册任务,重新刷新实例信息,主要就是将 EurekaClientConfig 的续约配置与本地的续约配置做对比,如果变更了就重新创建续约信息,并设置dirty,这样执行到下面就会重新注册。这种情况一般就是运行期间动态更新实例的配置,然后重新注册实例信息。
其实就是启动的时候会立即去注册,然后有一个周期性任务,每次都会检查实例信息租约配置,如果有变更,就会重新发起注册。
EurekaServer接收注册请求
EurekaServer接收注册请求的方法是ApplicationResource#addInstance()方法
@POST
@Consumes({"application/json", "application/xml"})
public Response addInstance(InstanceInfo info,
@HeaderParam(PeerEurekaNode.HEADER_REPLICATION) String isReplication) {
logger.debug("Registering instance {} (replication={})", info.getId(), isReplication);
// validate that the instanceinfo contains all the necessary required fields
// 做一些校验
if (isBlank(info.getId())) {
return Response.status(400).entity("Missing instanceId").build();
} else if (isBlank(info.getHostName())) {
return Response.status(400).entity("Missing hostname").build();
} else if (isBlank(info.getIPAddr())) {
return Response.status(400).entity("Missing ip address").build();
} else if (isBlank(info.getAppName())) {
return Response.status(400).entity("Missing appName").build();
} else if (!appName.equals(info.getAppName())) {
return Response.status(400).entity("Mismatched appName, expecting " + appName + " but was " + info.getAppName()).build();
} else if (info.getDataCenterInfo() == null) {
return Response.status(400).entity("Missing dataCenterInfo").build();
} else if (info.getDataCenterInfo().getName() == null) {
return Response.status(400).entity("Missing dataCenterInfo Name").build();
}
// handle cases where clients may be registering with bad DataCenterInfo with missing data
// AWS相关的跳过
DataCenterInfo dataCenterInfo = info.getDataCenterInfo();
if (dataCenterInfo instanceof UniqueIdentifier) {
String dataCenterInfoId = ((UniqueIdentifier) dataCenterInfo).getId();
if (isBlank(dataCenterInfoId)) {
boolean experimental = "true".equalsIgnoreCase(serverConfig.getExperimental("registration.validation.dataCenterInfoId"));
if (experimental) {
String entity = "DataCenterInfo of type " + dataCenterInfo.getClass() + " must contain a valid id";
return Response.status(400).entity(entity).build();
} else if (dataCenterInfo instanceof AmazonInfo) {
AmazonInfo amazonInfo = (AmazonInfo) dataCenterInfo;
String effectiveId = amazonInfo.get(AmazonInfo.MetaDataKey.instanceId);
if (effectiveId == null) {
amazonInfo.getMetadata().put(AmazonInfo.MetaDataKey.instanceId.getName(), info.getId());
}
} else {
logger.warn("Registering DataCenterInfo of type {} without an appropriate id", dataCenterInfo.getClass());
}
}
}
// 注册实例信息
registry.register(info, "true".equals(isReplication));
return Response.status(204).build(); // 204 to be backwards compatible
}
这里分为两步操作,第一步是注册当前实例,第二步是将此实例信息同步到集群中的其它节点。 如果这是来自其他节点的同步事件,则不会同步它。
public void register(InstanceInfo registrant, int leaseDuration, boolean isReplication) {
try {
read.lock();
// 获取该实例所属应用名的所有租约信息
Map<String, Lease<InstanceInfo>> gMap = registry.get(registrant.getAppName());
REGISTER.increment(isReplication);
// 还没有则新建一个Map
if (gMap == null) {
final ConcurrentHashMap<String, Lease<InstanceInfo>> gNewMap = new ConcurrentHashMap<String, Lease<InstanceInfo>>();
gMap = registry.putIfAbsent(registrant.getAppName(), gNewMap);
if (gMap == null) {
gMap = gNewMap;
}
}
// 获取该实例id在server端的租约信息
Lease<InstanceInfo> existingLease = gMap.get(registrant.getId());
// Retain the last dirty timestamp without overwriting it, if there is already a lease
// 该实例之前已经注册过了,不是新注册的
if (existingLease != null && (existingLease.getHolder() != null)) {
// 已存在的实例的最后更新时间
Long existingLastDirtyTimestamp = existingLease.getHolder().getLastDirtyTimestamp();
// 新注册的这个实例的最后更新时间
Long registrationLastDirtyTimestamp = registrant.getLastDirtyTimestamp();
logger.debug("Existing lease found (existing={}, provided={}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
// this is a > instead of a >= because if the timestamps are equal, we still take the remote transmitted
// InstanceInfo instead of the server local copy.
// 如果已存在的实例的最后更新时间>新注册的这个实例的最后更新时间,则使用已存在的实例
// 将新注册的实例设置为已存在的实例
if (existingLastDirtyTimestamp > registrationLastDirtyTimestamp) {
logger.warn("There is an existing lease and the existing lease's dirty timestamp {} is greater" +
" than the one that is being registered {}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
logger.warn("Using the existing instanceInfo instead of the new instanceInfo as the registrant");
registrant = existingLease.getHolder();
}
} else {
// 租约信息不存在说明该实例是新注册上来的服务实例
// The lease does not exist and hence it is a new registration
synchronized (lock) {
if (this.expectedNumberOfRenewsPerMin > 0) {
// Since the client wants to cancel it, reduce the threshold
// (1
// for 30 seconds, 2 for a minute)
// server期望每分钟收到的心跳数+2,因为默认每分钟一个服务实例发两次心跳
this.expectedNumberOfRenewsPerMin = this.expectedNumberOfRenewsPerMin + 2;
// 重新计算每分钟收到心跳的阈值
this.numberOfRenewsPerMinThreshold =
(int) (this.expectedNumberOfRenewsPerMin * serverConfig.getRenewalPercentThreshold());
}
}
logger.debug("No previous lease information found; it is new registration");
}
// 根据实例信息重新创建一个租约信息
Lease<InstanceInfo> lease = new Lease<InstanceInfo>(registrant, leaseDuration);
// 如果以前存在租约信息,则将服务启动时间设置到新的租约信息中
if (existingLease != null) {
lease.setServiceUpTimestamp(existingLease.getServiceUpTimestamp());
}
// 将新的租约信息设置到Map中
gMap.put(registrant.getId(), lease);
// 添加到最近注册队列中
synchronized (recentRegisteredQueue) {
recentRegisteredQueue.add(new Pair<Long, String>(
System.currentTimeMillis(),
registrant.getAppName() + "(" + registrant.getId() + ")"));
}
// This is where the initial state transfer of overridden status happens
if (!InstanceStatus.UNKNOWN.equals(registrant.getOverriddenStatus())) {
logger.debug("Found overridden status {} for instance {}. Checking to see if needs to be add to the "
+ "overrides", registrant.getOverriddenStatus(), registrant.getId());
if (!overriddenInstanceStatusMap.containsKey(registrant.getId())) {
logger.info("Not found overridden id {} and hence adding it", registrant.getId());
overriddenInstanceStatusMap.put(registrant.getId(), registrant.getOverriddenStatus());
}
}
InstanceStatus overriddenStatusFromMap = overriddenInstanceStatusMap.get(registrant.getId());
if (overriddenStatusFromMap != null) {
logger.info("Storing overridden status {} from map", overriddenStatusFromMap);
registrant.setOverriddenStatus(overriddenStatusFromMap);
}
// Set the status based on the overridden status rules
InstanceStatus overriddenInstanceStatus = getOverriddenInstanceStatus(registrant, existingLease, isReplication);
registrant.setStatusWithoutDirty(overriddenInstanceStatus);
// 如果注册的实例是UP状态,则设置启动的时间
if (InstanceStatus.UP.equals(registrant.getStatus())) {
lease.serviceUp();
}
registrant.setActionType(ActionType.ADDED);
// 添加到最近变更队列中
recentlyChangedQueue.add(new RecentlyChangedItem(lease));
// 设置实例的更新时间
registrant.setLastUpdatedTimestamp();
// 使缓存失效,不过readWriteCacheMap 可能要30秒才会同步到 readOnlyCacheMap
invalidateCache(registrant.getAppName(), registrant.getVIPAddress(), registrant.getSecureVipAddress());
logger.info("Registered instance {}/{} with status {} (replication={})",
registrant.getAppName(), registrant.getId(), registrant.getStatus(), isReplication);
} finally {
read.unlock();
}
}
最后一步,使缓存失效是指将跟该实例相关的缓存失效
可以看到会将读写缓存中该appName相关的Key,全量的Key,增量的Key都立即失效,无需等待过期时间到达,这样只读缓存下次定期更新时就会重新去全部注册表和最近变更记录表读取更新,这样可以较快的使客户端拉取到最新的注册信息。(为啥不将只读缓存中的相关Key也一起删除了,这样客户端下次拉取可以直接拉取到最新的,不然客户端拉取时如果只读缓存还没更新,那不是还是拉取到旧的信息?)
最近变更队列recentlyChangedQueue会定期清除,在AbstractInstanceRegistry的构造方法中,会启动一个该定时任务
默认30秒执行一次,清理180秒之前的数据。