目录
EurekaBootStrap
Eureka-Server 启动入口:该类实现了ServletContextListener,在 Servlet 容器( 例如 Tomcat、Jetty )启动时,调用 #contextInitialized()
方法。
@Override
public void contextInitialized(ServletContextEvent event) {
try {
// 初始化eureka-server配置环境
initEurekaEnvironment();
// 初始化eureka-server上下文
initEurekaServerContext();
ServletContext sc = event.getServletContext();
sc.setAttribute(EurekaServerContext.class.getName(), serverContext);
} catch (Throwable e) {
logger.error("Cannot bootstrap eureka server :", e);
throw new RuntimeException("Cannot bootstrap eureka server :", e);
}
}
下面依次看下这两步具体操作有哪些
initEurekaEnvironment
protected void initEurekaEnvironment() throws Exception {
logger.info("Setting the eureka configuration..");
// 获取数据中心
String dataCenter = ConfigurationManager.getConfigInstance().getString(EUREKA_DATACENTER);
if (dataCenter == null) {
logger.info("Eureka data center value eureka.datacenter is not set, defaulting to default");
ConfigurationManager.getConfigInstance().setProperty(ARCHAIUS_DEPLOYMENT_DATACENTER, DEFAULT);
} else {
ConfigurationManager.getConfigInstance().setProperty(ARCHAIUS_DEPLOYMENT_DATACENTER, dataCenter);
}
// 获取环境信息
String environment = ConfigurationManager.getConfigInstance().getString(EUREKA_ENVIRONMENT);
if (environment == null) {
ConfigurationManager.getConfigInstance().setProperty(ARCHAIUS_DEPLOYMENT_ENVIRONMENT, TEST);
logger.info("Eureka environment value eureka.environment is not set, defaulting to test");
}
}
主要是初始化环境信息,没有很多内容,主要看下初始化上下文的步骤
initEurekaServerContext
protected void initEurekaServerContext() throws Exception {
EurekaServerConfig eurekaServerConfig = new DefaultEurekaServerConfig();
// For backward compatibility
JsonXStream.getInstance().registerConverter(new V1AwareInstanceInfoConverter(), XStream.PRIORITY_VERY_HIGH);
XmlXStream.getInstance().registerConverter(new V1AwareInstanceInfoConverter(), XStream.PRIORITY_VERY_HIGH);
// 根据server配置,创建服务解码器
logger.info("Initializing the eureka client...");
logger.info(eurekaServerConfig.getJsonCodecName());
ServerCodecs serverCodecs = new DefaultServerCodecs(eurekaServerConfig);
ApplicationInfoManager applicationInfoManager = null;
// Eureka-Server 内嵌 Eureka-Client,用于和 Eureka-Server 集群里其他节点通信交互
if (eurekaClient == null) {
EurekaInstanceConfig instanceConfig = isCloud(ConfigurationManager.getDeploymentContext())
? new CloudInstanceConfig()
: new MyDataCenterInstanceConfig();
applicationInfoManager = new ApplicationInfoManager(
instanceConfig, new EurekaConfigBasedInstanceInfoProvider(instanceConfig).get());
EurekaClientConfig eurekaClientConfig = new DefaultEurekaClientConfig();
/**
* eureka server 的eurekaClient本身也是个DiscoveryClient
*/
eurekaClient = new DiscoveryClient(applicationInfoManager, eurekaClientConfig);
} else {
applicationInfoManager = eurekaClient.getApplicationInfoManager();
}
// 创建应用实例信息的注册表
PeerAwareInstanceRegistry registry;
if (isAws(applicationInfoManager.getInfo())) {
registry = new AwsInstanceRegistry(
eurekaServerConfig,
eurekaClient.getEurekaClientConfig(),
serverCodecs,
eurekaClient
);
awsBinder = new AwsBinderDelegate(eurekaServerConfig, eurekaClient.getEurekaClientConfig(), registry, applicationInfoManager);
awsBinder.start();
} else {
registry = new PeerAwareInstanceRegistryImpl(
eurekaServerConfig,
eurekaClient.getEurekaClientConfig(),
serverCodecs,
eurekaClient
);
}
// 创建 Eureka-Server 集群节点集合
PeerEurekaNodes peerEurekaNodes = getPeerEurekaNodes(
registry,
eurekaServerConfig,
eurekaClient.getEurekaClientConfig(),
serverCodecs,
applicationInfoManager
);
// 创建Eureka-Server上下文(提供初始化、关闭、获取等方法)
serverContext = new DefaultEurekaServerContext(
eurekaServerConfig,
serverCodecs,
registry,
peerEurekaNodes,
applicationInfoManager
);
// 初始化 EurekaServerContextHolder,使用它方便获取server的上下文
EurekaServerContextHolder.initialize(serverContext);
// 初始化时,其他server注册的client实例信息,通过创建remoteRegionRegistry时,创建线程发起http请求获得(应该也和其他的机制有关)
serverContext.initialize();
logger.info("Initialized server context");
// Copy registry from neighboring eureka node 从其他 Eureka-Server 拉取注册信息
int registryCount = registry.syncUp();
registry.openForTraffic(applicationInfoManager, registryCount);
// Register all monitoring statistics. 注册监控
EurekaMonitors.registerAllStats();
}
整个初始化EurekaServer的流程和细节点较多,一点点分析:
ApplicationInfoManager
注释1⃣️:
首先是根据EurekaInstanceConfig、InstanceInfo来创建应用管理类,这两个类作为属性构成ApplicationInfoManager,因此主要看下InstanceInfo的创建代码,通过EurekaConfigBasedInstanceInfoProvider(instanceConfig).get()来得到实例信息:
首先是根据配置文件的心跳间隔时间(默认30秒)、续约过期时间(默认90秒)来创建租约类的builder,租约类是服务发起续约,server过期应用的依据。后续是通过配置文件的设置数据来赋值实例属性,实例ID若有配置则读取配置,否则以主机名作为实例ID。
@Override
public synchronized InstanceInfo get() {
if (instanceInfo == null) {
// Build the lease information to be passed to the server based on config 根据eureka的Client配置构建要传递给服务器的租约信息
LeaseInfo.Builder leaseInfoBuilder = LeaseInfo.Builder.newBuilder()
.setRenewalIntervalInSecs(config.getLeaseRenewalIntervalInSeconds())
.setDurationInSecs(config.getLeaseExpirationDurationInSeconds());
if (vipAddressResolver == null) {
vipAddressResolver = new Archaius1VipAddressResolver();
}
// Builder the instance information to be registered with eureka server
InstanceInfo.Builder builder = InstanceInfo.Builder.newBuilder(vipAddressResolver);
// set the appropriate id for the InstanceInfo, falling back to datacenter Id if applicable, else hostname 设置instanceId,有若配置读取配置,否则以当前hostname作为实例ID
String instanceId = config.getInstanceId();
if (instanceId == null || instanceId.isEmpty()) {
DataCenterInfo dataCenterInfo = config.getDataCenterInfo();
if (dataCenterInfo instanceof UniqueIdentifier) {
instanceId = ((UniqueIdentifier) dataCenterInfo).getId();
} else {
instanceId = config.getHostName(false);
}
}
// 设置客户端默认地址,主机名或ID地址
String defaultAddress;
if (config instanceof RefreshableInstanceConfig) {
// Refresh AWS data center info, and return up to date address
defaultAddress = ((RefreshableInstanceConfig) config).resolveDefaultAddress(false);
} else {
defaultAddress = config.getHostName(false);
}
// fail safe
if (defaultAddress == null || defaultAddress.isEmpty()) {
defaultAddress = config.getIpAddress();
}
builder.setNamespace(config.getNamespace())
.setInstanceId(instanceId)
.setAppName(config.getAppname())
.setAppGroupName(config.getAppGroupName())
.setDataCenterInfo(config.getDataCenterInfo())
.setIPAddr(config.getIpAddress())
.setHostName(defaultAddress)
.setPort(config.getNonSecurePort())
.enablePort(PortType.UNSECURE, config.isNonSecurePortEnabled())
.setSecurePort(config.getSecurePort())
.enablePort(PortType.SECURE, config.getSecurePortEnabled())
.setVIPAddress(config.getVirtualHostName())
.setSecureVIPAddress(config.getSecureVirtualHostName())
.setHomePageUrl(config.getHomePageUrlPath(), config.getHomePageUrl())
.setStatusPageUrl(config.getStatusPageUrlPath(), config.getStatusPageUrl())
.setASGName(config.getASGName())
.setHealthCheckUrls(config.getHealthCheckUrlPath(),
config.getHealthCheckUrl(), config.getSecureHealthCheckUrl());
// Start off with the STARTING state to avoid traffic
if (!config.isInstanceEnabledOnit()) {
InstanceStatus initialStatus = InstanceStatus.STARTING;
LOG.info("Setting initial instance status as: {}", initialStatus);
builder.setStatus(initialStatus);
} else {
LOG.info("Setting initial instance status as: {}. This may be too early for the instance to advertise "
+ "itself as available. You would instead want to control this via a healthcheck handler.",
InstanceStatus.UP);
}
// Add any user-specific metadata information 添加一些特定于客户端的元数据信息
for (Map.Entry<String, String> mapEntry : config.getMetadataMap().entrySet()) {
String key = mapEntry.getKey();
String value = mapEntry.getValue();
// only add the metadata if the value is present
if (value != null && !value.isEmpty()) {
builder.add(key, value);
}
}
instanceInfo = builder.build();
instanceInfo.setLeaseInfo(leaseInfoBuilder.build());
}
return instanceInfo;
}
创建EurekaClient
紧接着通过eurekaClient = new DiscoveryClient(applicationInfoManager, eurekaClientConfig);应用管理类和eurekaClient的配置来创建eurekaClient,可以看到和EurekaClient一样,也是通过创建DiscoveryClient类的方式,来将当前server实例注册、续约注册中心的,从这里也可以看出来,Server实例没有主次之分,每个Server也是将自己作为一个client来注册到注册中心上的。
创建应用实例信息的注册表
后续通过PeerAwareInstanceRegistryImpl进行注册表的创建
public PeerAwareInstanceRegistryImpl(
EurekaServerConfig serverConfig,
EurekaClientConfig clientConfig,
ServerCodecs serverCodecs,
EurekaClient eurekaClient
) {
super(serverConfig, clientConfig, serverCodecs);
this.eurekaClient = eurekaClient;
this.numberOfReplicationsLastMin = new MeasuredRate(1000 * 60 * 1);
// We first check if the instance is STARTING or DOWN, then we check explicit overrides,
// then we check the status of a potentially existing lease.
this.instanceStatusOverrideRule = new FirstMatchWinsCompositeRule(new DownOrStartingRule(),
new OverrideExistsRule(overriddenInstanceStatusMap), new LeaseExistsRule());
}
通过调用父类的实现来初始化属性,然后创建实例状态覆盖规则,初始化时,赋予了三个覆盖规则,当都不满足时,返回默认覆盖规则执行结果。
覆盖状态的作用:
调用 Eureka-Server HTTP Restful 接口
apps/${APP_NAME}/${INSTANCE_ID}/status
对应用实例覆盖状态的变更,从而达到主动的、强制的变更应用实例状态。注意,实际不会真的修改 Eureka-Client 应用实例的状态,而是修改在 Eureka-Server 注册的应用实例的状态。通过这样的方式,Eureka-Client 在获取到注册信息时,并且配置
eureka.shouldFilterOnlyUpInstances = true
,过滤掉非InstanceStatus.UP
的应用实例,从而避免调动该实例,以达到应用实例的暂停服务(InstanceStatus.OUT_OF_SERVICE
),而无需关闭应用实例。因此,大多数情况下,调用该接口的目的,将应用实例状态在 (
InstanceStatus.UP
) 和 (InstanceStatus.OUT_OF_SERVICE
) 之间切换。
一、覆盖状态规则:
下线或启动规则(DownOrStartingRule):当当前实例状态不处于UP、OUT_OF_SERVICE时,进行当前规则的执行,直接返回当前实例的状态作为覆盖状态,
public class DownOrStartingRule implements InstanceStatusOverrideRule {
@Override
public StatusOverrideResult apply(InstanceInfo instanceInfo,
Lease<InstanceInfo> existingLease,
boolean isReplication) {
/**
* 若实例状态不处于运行中、暂停服务,(启动中、下线),则不适合提供服务,不匹配
*/
if ((!InstanceInfo.InstanceStatus.UP.equals(instanceInfo.getStatus()))
&& (!InstanceInfo.InstanceStatus.OUT_OF_SERVICE.equals(instanceInfo.getStatus()))) {
logger.debug("Trusting the instance status {} from replica or instance for instance {}",
instanceInfo.getStatus(), instanceInfo.getId());
return StatusOverrideResult.matchingStatus(instanceInfo.getStatus());
}
return StatusOverrideResult.NO_MATCH;
}
二、覆盖状态存在规则(OverrideExistsRule):若当前实例状态状态覆盖的数据,则使用已存在的覆盖状态作为当前覆盖状态
三、续约存在规则(LeaseExistsRule):非Server请求时,匹配已存在租约的应用实例的 nstanceStatus.OUT_OF_SERVICE
或者 InstanceInfo.InstanceStatus.UP
状态
四、默认规则(AlwaysMatchInstanceStatusRule):总是返回当前实例的状态,来作为覆盖状态。
super(serverConfig, clientConfig, serverCodecs)
接着看下super里做了些什么,首先是赋值属性,创建最近取消、注册队列,定时任务设置getDeltaRetentionTask()的线程以配置的时间间隔(默认30秒)来定时执行。而getDeltaRetentionTask的run()方法,是遍历最近改变的队列信息,若队列内实例更新时间超过当前时间一定的时间段(默认三分钟),则从最近改变队列中移除,当client端发起对注册信息增量获取时,recentlyChangedQueue被用来计算最近时间的增量,返回给client端。
protected AbstractInstanceRegistry(EurekaServerConfig serverConfig, EurekaClientConfig clientConfig, ServerCodecs serverCodecs) {
this.serverConfig = serverConfig;
this.clientConfig = clientConfig;
this.serverCodecs = serverCodecs;
this.recentCanceledQueue = new CircularQueue<Pair<Long, String>>(1000);
this.recentRegisteredQueue = new CircularQueue<Pair<Long, String>>(1000);
this.renewsLastMin = new MeasuredRate(1000 * 60 * 1);
/**
* 30秒执行一次清理工作
*/
this.deltaRetentionTimer.schedule(getDeltaRetentionTask(),
serverConfig.getDeltaRetentionTimerIntervalInMs(),
serverConfig.getDeltaRetentionTimerIntervalInMs());
}
private TimerTask getDeltaRetentionTask() {
return new TimerTask() {
@Override
public void run() {
Iterator<RecentlyChangedItem> it = recentlyChangedQueue.iterator();
while (it.hasNext()) {
// 将更新时间超过当前3分钟的数据移出队列
if (it.next().getLastUpdateTime() <
System.currentTimeMillis() - serverConfig.getRetentionTimeInMSInDeltaQueue()) {
it.remove();
} else {
break;
}
}
}
};
}
后续是创建PeerEurekaNodes(Eureka-Server 集群节点集合)、创建EurekaServerContext(Eureka-Server上下文),将上下文放入holder方便获取,紧接着到了初始化上下文的节点,重点看下这个
初始化EurekaServerContext
初始化的代码,主要是做了两件事,启动刚刚创建好的server集群节点集合、初始化应用实例信息注册表,一个个来看。
public void initialize() {
logger.info("Initializing ...");
// 启动 Eureka-Server 集群节点集合(集群复制)
peerEurekaNodes.start();
try {
// 初始化 应用实例信息的注册表
registry.init(peerEurekaNodes);
} catch (Exception e) {
throw new RuntimeException(e);
}
logger.info("Initialized");
}
启动Server集群节点集合
start():首先是创建一个定时任务,紧接着更新集群节点信息,创建一个实现run方法的类,run()的代码主要起到更新集群节点信息的作用,接着将该类交由定时任务每隔一定时间执行一次。这样就保证了初始化时,获取到server的集群信息,并每间隔一段时间发起请求去更新本地注册表server实例的用途。
public void start() {
// 创建定时任务
taskExecutor = Executors.newSingleThreadScheduledExecutor(
new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread thread = new Thread(r, "Eureka-PeerNodesUpdater");
thread.setDaemon(true);
return thread;
}
}
);
try {
// 初始化集群节点信息
updatePeerEurekaNodes(resolvePeerUrls());
Runnable peersUpdateTask = new Runnable() {
@Override
public void run() {
try {
// 更新集群节点信息
updatePeerEurekaNodes(resolvePeerUrls());
} catch (Throwable e) {
logger.error("Cannot update the replica Nodes", e);
}
}
};
/**
* 定时任务设置执行间隔(默认10分钟)
*/
taskExecutor.scheduleWithFixedDelay(
peersUpdateTask,
serverConfig.getPeerEurekaNodesUpdateIntervalMs(),
serverConfig.getPeerEurekaNodesUpdateIntervalMs(),
TimeUnit.MILLISECONDS
);
} catch (Exception e) {
throw new IllegalStateException(e);
}
for (PeerEurekaNode node : peerEurekaNodes) {
logger.info("Replica node URL: {}", node.getServiceUrl());
}
}
下面主要看下,server是如何初始化、更新集群节点信息的
resolvePeerUrls():得到当前实例信息、可见区,然后通过getDiscoveryServiceUrls()根据DNS或者配置信息解析出所有的server服务URLS,除去当前server实例信息,就是其他所有的server服务。
protected List<String> resolvePeerUrls() {
/**
* 获取当前实例信息、可见区
*/
InstanceInfo myInfo = applicationInfoManager.getInfo();
String zone = InstanceInfo.getZone(clientConfig.getAvailabilityZones(clientConfig.getRegion()), myInfo);
// 获取 eureka 客户端与之对话的所有 eureka 服务 url 的列表
List<String> replicaUrls = EndpointUtils
.getDiscoveryServiceUrls(clientConfig, zone, new EndpointUtils.InstanceInfoBasedUrlRandomizer(myInfo));
/**
* 去除本身URL,剩余是需要同步的
*/
int idx = 0;
while (idx < replicaUrls.size()) {
if (isThisMyUrl(replicaUrls.get(idx))) {
replicaUrls.remove(idx);
} else {
idx++;
}
}
return replicaUrls;
}
public static List<String> getDiscoveryServiceUrls(EurekaClientConfig clientConfig, String zone, ServiceUrlRandomizer randomizer) {
/**
* 根据DNS或者配置信息解析出所有server服务urls
*/
boolean shouldUseDns = clientConfig.shouldUseDnsForFetchingServiceUrls();
if (shouldUseDns) {
return getServiceUrlsFromDNS(clientConfig, zone, clientConfig.shouldPreferSameZoneEureka(), randomizer);
}
return getServiceUrlsFromConfig(clientConfig, zone, clientConfig.shouldPreferSameZoneEureka());
}
更新集群节点集合
上面已经得到最新的server节点集合,之前的server节点除去所有新server,剩余的就是要下线的server,最新server除去所有之前server节点集合,就是本次新增的server节点集合。
若本次没有新增、下线server节点,不进行操作。若下线节点集合存在,shutdown下线的eurekaNode,关闭node的线程池(集群同步,下文讲解);若新增节点集合存在,创建node节点,并添加到临时变量newNodeList中,后续赋值为当前server集群节点集合。首次初始化时,就是通过解析出的server全为新增节点,来完成peerEurekaNodes集合的属性填充。
protected void updatePeerEurekaNodes(List<String> newPeerUrls) {
if (newPeerUrls.isEmpty()) {
logger.warn("The replica size seems to be empty. Check the route 53 DNS Registry");
return;
}
/**
* 计算出需要下线、新增的服务列表
*/
Set<String> toShutdown = new HashSet<>(peerEurekaNodeUrls);
toShutdown.removeAll(newPeerUrls);
Set<String> toAdd = new HashSet<>(newPeerUrls);
toAdd.removeAll(peerEurekaNodeUrls);
/**
* 无需要下线、新增的,直接返回
*/
if (toShutdown.isEmpty() && toAdd.isEmpty()) { // No change
return;
}
// Remove peers no long available
List<PeerEurekaNode> newNodeList = new ArrayList<>(peerEurekaNodes);
/**
* 下线删除
*/
if (!toShutdown.isEmpty()) {
logger.info("Removing no longer available peer nodes {}", toShutdown);
int i = 0;
while (i < newNodeList.size()) {
PeerEurekaNode eurekaNode = newNodeList.get(i);
if (toShutdown.contains(eurekaNode.getServiceUrl())) {
newNodeList.remove(i);
eurekaNode.shutDown();
} else {
i++;
}
}
}
/**
* 新增添加到peerEurekaNodes
*/
// Add new peers
if (!toAdd.isEmpty()) {
logger.info("Adding new peer nodes {}", toAdd);
for (String peerUrl : toAdd) {
newNodeList.add(createPeerEurekaNode(peerUrl));
}
}
this.peerEurekaNodes = newNodeList;
this.peerEurekaNodeUrls = new HashSet<>(newPeerUrls);
}
初始化 应用实例信息的注册表
根据刚刚创建好的集群节点集合去初始化注册信息:每一步的作用下面已有注释,下面分析下主要的代码
public void init(PeerEurekaNodes peerEurekaNodes) throws Exception {
// 开始测速类定时任务,每分钟清空,实现测速
this.numberOfReplicationsLastMin.start();
// 属性赋值
this.peerEurekaNodes = peerEurekaNodes;
// 初始化响应缓存
initializedResponseCache();
// 通过定时任务,间隔一定时间,更新续约阈值
scheduleRenewalThresholdUpdateTask();
// 初始化远程的server注册信息
initRemoteRegionRegistry();
try {
// 监控注册
Monitors.registerObject(this);
} catch (Throwable e) {
logger.warn("Cannot register the JMX monitor for the InstanceRegistry :", e);
}
}
初始化响应缓存
1、通过CacheBuilder创建默认1000大小,180秒过期的缓存数据
2、若当前实例允许使用readonly的缓存,那么开启定时任务,每隔30秒更新readOnlyCacheMap的数据,实现是遍历readOnlyCacheMap若value值和readWriteCacheMap的value值不同,则将readWriteCacheMap中最新的value值赋值到readOnly缓存中去,这样就保证了readOnly的数据不会存在太久的脏数据。
ResponseCacheImpl(EurekaServerConfig serverConfig, ServerCodecs serverCodecs, AbstractInstanceRegistry registry) {
this.serverConfig = serverConfig;
this.serverCodecs = serverCodecs;
this.shouldUseReadOnlyResponseCache = serverConfig.shouldUseReadOnlyResponseCache();
this.registry = registry;
long responseCacheUpdateIntervalMs = serverConfig.getResponseCacheUpdateIntervalMs();
// guava缓存 默认容量1000,180秒过期
this.readWriteCacheMap =
CacheBuilder.newBuilder().initialCapacity(serverConfig.getInitialCapacityOfResponseCache())
.expireAfterWrite(serverConfig.getResponseCacheAutoExpirationInSeconds(), TimeUnit.SECONDS)
.removalListener(new RemovalListener<Key, Value>() {
@Override
public void onRemoval(RemovalNotification<Key, Value> notification) {
Key removedKey = notification.getKey();
if (removedKey.hasRegions()) {
Key cloneWithNoRegions = removedKey.cloneWithoutRegions();
regionSpecificKeys.remove(cloneWithNoRegions, removedKey);
}
}
})
.build(new CacheLoader<Key, Value>() {
@Override
public Value load(Key key) throws Exception {
if (key.hasRegions()) {
Key cloneWithNoRegions = key.cloneWithoutRegions();
regionSpecificKeys.put(cloneWithNoRegions, key);
}
Value value = generatePayload(key);
return value;
}
});
if (shouldUseReadOnlyResponseCache) {
// 初始化定时任务。配置 eureka.responseCacheUpdateIntervalMs,设置任务执行频率,默认值 :30 * 1000 毫秒
// 负责更新readwriterMap中的数据
timer.schedule(getCacheUpdateTask(),
new Date(((System.currentTimeMillis() / responseCacheUpdateIntervalMs) * responseCacheUpdateIntervalMs)
+ responseCacheUpdateIntervalMs),
responseCacheUpdateIntervalMs);
}
try {
Monitors.registerObject(this);
} catch (Throwable e) {
logger.warn("Cannot register the JMX monitor for the InstanceRegistry", e);
}
}
readOnly数据的填充,是当其他服务发起全量、增量、应用信息获取时,通过响应缓存读取数据,若readOnlyMap存在缓存则,直接返回,否则读取readWriterCacheMap,并将返回值赋值到readOnlyMap中,避免对readWriterCacheMap的频繁调用。
Value getValue(final Key key, boolean useReadOnlyCache) {
Value payload = null;
try {
if (useReadOnlyCache) {
// 先读取 readOnlyCacheMap 。读取不到,读取 readWriteCacheMap ,并设置到 readOnlyCacheMap
final Value currentPayload = readOnlyCacheMap.get(key);
if (currentPayload != null) {
payload = currentPayload;
} else {
// 过期后,重新设置(默认180秒过期一次)
payload = readWriteCacheMap.get(key);
readOnlyCacheMap.put(key, payload);
}
} else {
// 读取 readWriteCacheMap
payload = readWriteCacheMap.get(key);
}
} catch (Throwable t) {
logger.error("Cannot get value for key : {}", key, t);
}
return payload;
}
那么为什么要这样操作,避免对readWriterCacheMap频繁调用呢,readWriterCacheMap已经是记录三分钟、1000容量的缓存,当未命中到缓存时,需要通过generatePayload()方法,来获取到具体数据,具体代码见下面代码:
.build(new CacheLoader<Key, Value>() {
@Override
public Value load(Key key) throws Exception {
if (key.hasRegions()) {
Key cloneWithNoRegions = key.cloneWithoutRegions();
regionSpecificKeys.put(cloneWithNoRegions, key);
}
Value value = generatePayload(key);
return value;
}
});
// 服务有效负载
private Value generatePayload(Key key) {
Stopwatch tracer = null;
try {
String payload;
switch (key.getEntityType()) {
case Application:
boolean isRemoteRegionRequested = key.hasRegions();
if (ALL_APPS.equals(key.getName())) {
if (isRemoteRegionRequested) {
tracer = serializeAllAppsWithRemoteRegionTimer.start();
payload = getPayLoad(key, registry.getApplicationsFromMultipleRegions(key.getRegions()));
} else {
// 全量获取
tracer = serializeAllAppsTimer.start();
// 根据注册的实例集合和key构造需要缓存的实例数据
payload = getPayLoad(key, registry.getApplications());
}
} else if (ALL_APPS_DELTA.equals(key.getName())) {
// 增量(待完成)
if (isRemoteRegionRequested) {
tracer = serializeDeltaAppsWithRemoteRegionTimer.start();
versionDeltaWithRegions.incrementAndGet();
versionDeltaWithRegionsLegacy.incrementAndGet();
payload = getPayLoad(key,
registry.getApplicationDeltasFromMultipleRegions(key.getRegions()));
} else {
tracer = serializeDeltaAppsTimer.start();
versionDelta.incrementAndGet();
versionDeltaLegacy.incrementAndGet();
payload = getPayLoad(key, registry.getApplicationDeltas());
}
} else {
tracer = serializeOneApptimer.start();
payload = getPayLoad(key, registry.getApplication(key.getName()));
}
break;
case VIP:
case SVIP:
tracer = serializeViptimer.start();
payload = getPayLoad(key, getApplicationsForVip(key, registry));
break;
default:
logger.error("Unidentified entity type: {} found in the cache key.", key.getEntityType());
payload = "";
break;
}
return new Value(payload);
} finally {
if (tracer != null) {
tracer.stop();
}
}
}
根据不同的请求类型,来完成相应操作,类型较多,只以全量为例进行解析,其余实现类似:getPayLoad():通过server解码器,将数据解码为String,返回给client,忽略。getApplicationsFromMultipleRegions():增加监控数据,可忽略,后续读取当前server的注册表信息,将租约实例信息添加到临时变量apps中,此时当前server的所有实例信息已经得到,但是当前server可能因为网络原因,本地的注册表并不一定会是最全最新的注册信息,因此将本地缓存的其他server的注册表信息(server集群同步时缓存,下文讲解)也添加到apps中,循环远程注册表缓存数据,apps获取该app名称,若不存在,创建app信息,并添加所有实例,因是map、set数据结构来接收实例信息,所以不会存在多个实例重复注册后,数据重复的情况。得到所有apps后,所有实例状态及状态对应数据放入map中,通过hashmap的自然排序,生成应用集合的hashcode,hashcode例子:DOWN_2_UP_8_。
public Applications getApplicationsFromMultipleRegions(String[] remoteRegions) {
boolean includeRemoteRegion = null != remoteRegions && remoteRegions.length != 0;
logger.debug("Fetching applications registry with remote regions: {}, Regions argument {}",
includeRemoteRegion, remoteRegions);
// 增加对应指令的监控数据
if (includeRemoteRegion) {
GET_ALL_WITH_REMOTE_REGIONS_CACHE_MISS.increment();
} else {
GET_ALL_CACHE_MISS.increment();
}
// 获取当前server的所有应用实例集合
Applications apps = new Applications();
apps.setVersion(1L);
for (Entry<String, Map<String, Lease<InstanceInfo>>> entry : registry.entrySet()) {
Application app = null;
if (entry.getValue() != null) {
for (Entry<String, Lease<InstanceInfo>> stringLeaseEntry : entry.getValue().entrySet()) {
Lease<InstanceInfo> lease = stringLeaseEntry.getValue();
if (app == null) {
app = new Application(lease.getHolder().getAppName());
}
app.addInstance(decorateInstanceInfo(lease));
}
}
if (app != null) {
apps.addApplication(app);
}
}
// 将其他活跃的server注册数据,也依次添加进集合中
if (includeRemoteRegion) {
for (String remoteRegion : remoteRegions) {
// 猜测:应该是集群同步时,有其他集群的实例信息缓存?
RemoteRegionRegistry remoteRegistry = regionNameVSRemoteRegistry.get(remoteRegion);
if (null != remoteRegistry) {
Applications remoteApps = remoteRegistry.getApplications();
for (Application application : remoteApps.getRegisteredApplications()) {
if (shouldFetchFromRemoteRegistry(application.getName(), remoteRegion)) {
logger.info("Application {} fetched from the remote region {}",
application.getName(), remoteRegion);
Application appInstanceTillNow = apps.getRegisteredApplications(application.getName());
if (appInstanceTillNow == null) {
appInstanceTillNow = new Application(application.getName());
apps.addApplication(appInstanceTillNow);
}
for (InstanceInfo instanceInfo : application.getInstances()) {
appInstanceTillNow.addInstance(instanceInfo);
}
} else {
logger.debug("Application {} not fetched from the remote region {} as there exists a "
+ "whitelist and this app is not in the whitelist.",
application.getName(), remoteRegion);
}
}
} else {
logger.warn("No remote registry available for the remote region {}", remoteRegion);
}
}
}
// 设置 应用集合hashcode,可以用来后续匹配校验是否变更过(该变量用于校验增量获取的注册信息和 Eureka-Server 全量的注册信息是否一致( 完整 ))
apps.setAppsHashCode(apps.getReconcileHashCode());
return apps;
}
更新续约阈值:
跳出来,接着说初始化注册表,接下来会通过定时任务,更新续约阈值。
通过定时任务,每隔十五分钟执行一次,进行重置当前server的自我保护阈值。首先是获取当前server的所有实例数值,当数据大于 预期客户端数*0.85,或者未开启自我自我保护机制时,更新预期客户端数值,变更每分钟续约最小阈值。当开启保护机制,若运行活跃实例数据小于预期客户端*0.85时,不进行操作,此处也是server自我保护机制的实现,server任务该时间段内过多服务下线,server自动进行自我保护机制。不修改预期客户端数值、每分钟续约阈值,那么当驱逐实例定时任务运行时,存在其他服务续约过期,server判断实例数过小时,保护当前注册表信息,不会进行驱逐操作(下面分析)。
private void scheduleRenewalThresholdUpdateTask() {
timer.schedule(new TimerTask() {
@Override
public void run() {
updateRenewalThreshold();
}
}, serverConfig.getRenewalThresholdUpdateIntervalMs(),
serverConfig.getRenewalThresholdUpdateIntervalMs());
}
private void updateRenewalThreshold() {
try {
// 计算 应用实例数
Applications apps = eurekaClient.getApplications();
int count = 0;
for (Application app : apps.getRegisteredApplications()) {
for (InstanceInfo instance : app.getInstances()) {
if (this.isRegisterable(instance)) {
++count;
}
}
}
// 若count>上次实例数*0.85,(server没进入自我保护机制)或者未开启自动保护时,更新实例数
// 进入自我保护机制后,会保护目前注册表的实例
synchronized (lock) {
// Update threshold only if the threshold is greater than the
// current expected threshold or if self preservation is disabled.
if ((count) > (serverConfig.getRenewalPercentThreshold() * expectedNumberOfClientsSendingRenews)
// 未开启 自我保护机制配置
|| (!this.isSelfPreservationModeEnabled())) {
this.expectedNumberOfClientsSendingRenews = count;
updateRenewsPerMinThreshold();
}
}
logger.info("Current renewal threshold is : {}", numberOfRenewsPerMinThreshold);
} catch (Throwable e) {
logger.error("Cannot update renewal threshold", e);
}
}
初始化远程的server注册信息
通过server配置信息根据名称获取到其他server服务路径,遍历map,创建RemoteRegionRegistry远程server注册类并添加到regionNameVSRemoteRegistry中。
protected void initRemoteRegionRegistry() throws MalformedURLException {
Map<String, String> remoteRegionUrlsWithName = serverConfig.getRemoteRegionUrlsWithName();
if (!remoteRegionUrlsWithName.isEmpty()) {
allKnownRemoteRegions = new String[remoteRegionUrlsWithName.size()];
int remoteRegionArrayIndex = 0;
for (Map.Entry<String, String> remoteRegionUrlWithName : remoteRegionUrlsWithName.entrySet()) {
// 初始化时,通过http请求获取其他server的注册信息
RemoteRegionRegistry remoteRegionRegistry = new RemoteRegionRegistry(
serverConfig,
clientConfig,
serverCodecs,
remoteRegionUrlWithName.getKey(),
new URL(remoteRegionUrlWithName.getValue()));
regionNameVSRemoteRegistry.put(remoteRegionUrlWithName.getKey(), remoteRegionRegistry);
allKnownRemoteRegions[remoteRegionArrayIndex++] = remoteRegionUrlWithName.getKey();
}
}
logger.info("Finished initializing remote region registries. All known remote regions: {}",
(Object) allKnownRemoteRegions);
}
下面看下具体每个RemoteRegionRegistry的初始化过程
1、根据server解码器、远程server的URL、regionName来构建出server间发送请求的eurekaHttpClient
2、通过fetchRegistry(),对远程server发起全量获取的请求,便于当前刚启动的server可以获取到注册信息,初始化时实例的注册表为空,所以进行全量获取,当全量获取成功时,才会将readyForServingData设置为true,若此属性不为true,则当前server无法被client端获取到注册表信息。
注:当出现网络异常时,请求未被成功处理,若是全量获取,则通过后续的定时任务在获取server注册信息时,全量请求成功后,当属性设置为true后,当前server注册信息获取接口才可以被访问;若是增量获取,则说明当前server已经是服务状态,在进行增量获取时,或返回值为null,则进行全量获取server信息。
private boolean fetchRegistry() { try { if (serverConfig.shouldDisableDeltaForRemoteRegions() || (getApplications() == null) || (getApplications().getRegisteredApplications().size() == 0)) { // 全量获取 success = storeFullRegistry(); } else { // 增量获取 success = fetchAndStoreDelta(); } logTotalInstances(); } catch (Throwable e) { logger.error("Unable to fetch registry information from the remote registry {}", this.remoteRegionURL, e); return false; } finally { if (tracer != null) { tracer.stop(); } } if (success) { timeOfLastSuccessfulRemoteFetch = System.currentTimeMillis(); } return success; }
3、通过定时任务以一定的时间间隔(默认30秒)后,执行TimedSupervisorTask的run方法,TimedSupervisorTask的run()又会调用fetchRegistry()来完成对远端server实例变更信息进行增量获取。
public RemoteRegionRegistry(EurekaServerConfig serverConfig,
EurekaClientConfig clientConfig,
ServerCodecs serverCodecs,
String regionName,
URL remoteRegionURL) {
this.serverConfig = serverConfig;
this.remoteRegionURL = remoteRegionURL;
this.fetchRegistryTimer = Monitors.newTimer(this.remoteRegionURL.toString() + "_FetchRegistry");
EurekaJerseyClientBuilder clientBuilder = new EurekaJerseyClientBuilder()
.withUserAgent("Java-EurekaClient-RemoteRegion")
.withEncoderWrapper(serverCodecs.getFullJsonCodec())
.withDecoderWrapper(serverCodecs.getFullJsonCodec())
.withConnectionTimeout(serverConfig.getRemoteRegionConnectTimeoutMs())
.withReadTimeout(serverConfig.getRemoteRegionReadTimeoutMs())
.withMaxConnectionsPerHost(serverConfig.getRemoteRegionTotalConnectionsPerHost())
.withMaxTotalConnections(serverConfig.getRemoteRegionTotalConnections())
.withConnectionIdleTimeout(serverConfig.getRemoteRegionConnectionIdleTimeoutSeconds());
if (remoteRegionURL.getProtocol().equals("http")) {
clientBuilder.withClientName("Discovery-RemoteRegionClient-" + regionName);
} else if ("true".equals(System.getProperty("com.netflix.eureka.shouldSSLConnectionsUseSystemSocketFactory"))) {
clientBuilder.withClientName("Discovery-RemoteRegionSystemSecureClient-" + regionName)
.withSystemSSLConfiguration();
} else {
clientBuilder.withClientName("Discovery-RemoteRegionSecureClient-" + regionName)
.withTrustStoreFile(
serverConfig.getRemoteRegionTrustStore(),
serverConfig.getRemoteRegionTrustStorePassword()
);
}
discoveryJerseyClient = clientBuilder.build();
discoveryApacheClient = discoveryJerseyClient.getClient();
// should we enable GZip decoding of responses based on Response Headers?
if (serverConfig.shouldGZipContentFromRemoteRegion()) {
// compressed only if there exists a 'Content-Encoding' header whose value is "gzip"
discoveryApacheClient.addFilter(new GZIPContentEncodingFilter(false));
}
String ip = null;
try {
ip = InetAddress.getLocalHost().getHostAddress();
} catch (UnknownHostException e) {
logger.warn("Cannot find localhost ip", e);
}
EurekaServerIdentity identity = new EurekaServerIdentity(ip);
discoveryApacheClient.addFilter(new EurekaIdentityHeaderFilter(identity));
// Configure new transport layer (candidate for injecting in the future)
EurekaHttpClient newEurekaHttpClient = null;
try {
ClusterResolver clusterResolver = StaticClusterResolver.fromURL(regionName, remoteRegionURL);
newEurekaHttpClient = EurekaServerHttpClients.createRemoteRegionClient(
serverConfig, clientConfig.getTransportConfig(), serverCodecs, clusterResolver);
} catch (Exception e) {
logger.warn("Transport initialization failure", e);
}
this.eurekaHttpClient = newEurekaHttpClient;
try {
if (fetchRegistry()) {
this.readyForServingData = true;
} else {
logger.warn("Failed to fetch remote registry. This means this eureka server is not ready for serving "
+ "traffic.");
}
} catch (Throwable e) {
logger.error("Problem fetching registry information :", e);
}
// remote region fetch
Runnable remoteRegionFetchTask = new Runnable() {
@Override
public void run() {
try {
if (fetchRegistry()) {
readyForServingData = true;
} else {
logger.warn("Failed to fetch remote registry. This means this eureka server is not "
+ "ready for serving traffic.");
}
} catch (Throwable e) {
logger.error(
"Error getting from remote registry :", e);
}
}
};
ThreadPoolExecutor remoteRegionFetchExecutor = new ThreadPoolExecutor(
1, serverConfig.getRemoteRegionFetchThreadPoolSize(), 0, TimeUnit.SECONDS, new SynchronousQueue<Runnable>()); // use direct handoff
scheduler = Executors.newScheduledThreadPool(1,
new ThreadFactoryBuilder()
.setNameFormat("Eureka-RemoteRegionCacheRefresher_" + regionName + "-%d")
.setDaemon(true)
.build());
scheduler.schedule(
new TimedSupervisorTask(
"RemoteRegionFetch_" + regionName,
scheduler,
remoteRegionFetchExecutor,
serverConfig.getRemoteRegionRegistryFetchInterval(),
TimeUnit.SECONDS,
5, // exponential backoff bound
remoteRegionFetchTask
),
serverConfig.getRemoteRegionRegistryFetchInterval(), TimeUnit.SECONDS);
try {
Monitors.registerObject(this);
} catch (Throwable e) {
logger.warn("Cannot register the JMX monitor for the RemoteRegionRegistry :", e);
}
}
TimedSupervisorTask:通过线程池执行task的run(),具体业务就是上面代码块的fetchRegistry()方法。TimedSupervisorTask的run()首先是执行远程server注册信息的增量获取,阻塞直到线程执行完成,下次执行时间设置为timeoutMillis(30秒),若future执行超时仍未获取到返回结果,那么catch中设置长一倍的时间间隔(60秒)再发起对server的注册信息的获取。finally当定时任务未关闭时时,设置下一次的延迟时间,去对远程server注册信息的增量获取。
public void run() {
Future<?> future = null;
try {
future = executor.submit(task);
threadPoolLevelGauge.set((long) executor.getActiveCount());
// 阻塞直到完成或超时
future.get(timeoutMillis, TimeUnit.MILLISECONDS); // block until done or timeout
// 设置 下一次任务执行频率
delay.set(timeoutMillis);
threadPoolLevelGauge.set((long) executor.getActiveCount());
successCounter.increment();
} catch (TimeoutException e) {
logger.warn("task supervisor timed out", e);
timeoutCounter.increment();
// 若超时,则两倍续约时间后再调用(60s 设置 下一次任务执行频率,server的问题,client不背锅)
long currentDelay = delay.get();
long newDelay = Math.min(maxDelay, currentDelay * 2);
delay.compareAndSet(currentDelay, newDelay);
} catch (RejectedExecutionException e) {
if (executor.isShutdown() || scheduler.isShutdown()) {
logger.warn("task supervisor shutting down, reject the task", e);
} else {
logger.warn("task supervisor rejected the task", e);
}
rejectedCounter.increment();
} catch (Throwable e) {
if (executor.isShutdown() || scheduler.isShutdown()) {
logger.warn("task supervisor shutting down, can't accept the task");
} else {
logger.warn("task supervisor threw an exception", e);
}
throwableCounter.increment();
} finally {
// 取消 未完成的任务
if (future != null) {
future.cancel(true);
}
// 若,定时任务没有关闭,调度 下次任务
if (!scheduler.isShutdown()) {
scheduler.schedule(this, delay.get(), TimeUnit.MILLISECONDS);
}
}
}
远程Eureka-Server拉取注册信息
registry.syncUp()方法将请求得到的注册信息缓存到当前server中,便于后续client发起获取实例信息请求时,可以做出及时响应,同时计算得到总实例数值。
遍历初始化远程server注册信息的数据,依次registry到当前server中
当i>0并且count==0,说明第一次遍历,未成功拉取注册信息,通过休眠一定时间后(默认30秒),再次重试设置注册信息,默认最多重试5次。
public int syncUp() {
// Copy entire entry from neighboring DS node
int count = 0;
/**
* 注册同步初始注册信息的重试次数
*/
for (int i = 0; ((i < serverConfig.getRegistrySyncRetries()) && (count == 0)); i++) {
// 大于0说明,在重试,休眠配置的延迟时间后(默认30秒),再次同步初始注册信息
if (i > 0) {
try {
Thread.sleep(serverConfig.getRegistrySyncRetryWaitMs());
} catch (InterruptedException e) {
logger.warn("Interrupted during registry transfer..");
break;
}
}
// 获取注册信息
Applications apps = eurekaClient.getApplications();
for (Application app : apps.getRegisteredApplications()) {
for (InstanceInfo instance : app.getInstances()) {
try {
// 判断是否可以注册
if (isRegisterable(instance)) {
register(instance, instance.getLeaseInfo().getDurationInSecs(), true);
count++;
}
} catch (Throwable t) {
logger.error("During DS init copy", t);
}
}
}
}
return count;
}
下面看下单个实例的register:
1、根据服务名称获取本地注册表的服务信息,获取续约信息,新应用则初始化map,续约信息不存在。
2、若续约信息存在,判断本地、远程续约信息的上次实例数据不一致时间戳的大小,以时间戳最大的实例信息作为最新实例信息记录到本地;若不存在续约,则证明对于当前server而言是一个新应用的注册,则预期客户端数加一,并更新续约阈值,续约阈值之前介绍过,和server端的自我保护机制有关。
3、根据实例信息、驱逐时间间隔创建租约并记录到本地注册缓存中,根据覆盖实例状态规则 获得应用实例最终状态,并设置应用实例的状态,设置操作类型添加到最近变更队列中,设置readWriteCacheMap响应缓存失效。当readOnly开启使用时,通过定时任务失效后,后续就可以生成当前最新的实例信息(上面的getValue()方法);当readOnly未开启使用时 ,直接通过查询数据得到最新实例信息(上面的generatePayload()方法)。
public void register(InstanceInfo registrant, int leaseDuration, boolean isReplication) {
read.lock();
try {
// 根据app名称获取之前的续约实例的信息(不存在返回空map)
Map<String, Lease<InstanceInfo>> gMap = registry.get(registrant.getAppName());
// 增加 注册次数 到 监控
REGISTER.increment(isReplication);
if (gMap == null) {
// 无租约,优先添加
final ConcurrentHashMap<String, Lease<InstanceInfo>> gNewMap = new ConcurrentHashMap<String, Lease<InstanceInfo>>();
gMap = registry.putIfAbsent(registrant.getAppName(), gNewMap);
if (gMap == null) {
gMap = gNewMap;
}
}
Lease<InstanceInfo> existingLease = gMap.get(registrant.getId());
// 已存在时,使用数据不一致的时间大的应用注册信息为有效的
// Retain the last dirty timestamp without overwriting it, if there is already a lease
if (existingLease != null && (existingLease.getHolder() != null)) {
Long existingLastDirtyTimestamp = existingLease.getHolder().getLastDirtyTimestamp();
Long registrationLastDirtyTimestamp = registrant.getLastDirtyTimestamp();
logger.debug("Existing lease found (existing={}, provided={}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
// this is a > instead of a >= because if the timestamps are equal, we still take the remote transmitted
// InstanceInfo instead of the server local copy.
// 判断已存在的和请求接口过来的实例时间戳的值,时间戳大的作为最新的有效应用
if (existingLastDirtyTimestamp > registrationLastDirtyTimestamp) {
logger.warn("There is an existing lease and the existing lease's dirty timestamp {} is greater" +
" than the one that is being registered {}", existingLastDirtyTimestamp, registrationLastDirtyTimestamp);
logger.warn("Using the existing instanceInfo instead of the new instanceInfo as the registrant");
registrant = existingLease.getHolder();
}
} else {
// 不存在续约数据,说明是新应用注册(增加应用实例、更新自我保护阈值,即 实例数每分钟调用总次数 * 0.85)
synchronized (lock) {
if (this.expectedNumberOfClientsSendingRenews > 0) {
// Since the client wants to register it, increase the number of clients sending renews
// 因为客户端想要注册它,所以增加发送更新的客户端数量
this.expectedNumberOfClientsSendingRenews = this.expectedNumberOfClientsSendingRenews + 1;
// 更新 续约阈值
updateRenewsPerMinThreshold();
}
}
logger.debug("No previous lease information found; it is new registration");
}
// 创建 租约
Lease<InstanceInfo> lease = new Lease<>(registrant, leaseDuration);
if (existingLease != null) {
lease.setServiceUpTimestamp(existingLease.getServiceUpTimestamp());
}
// 添加到 租约映射
gMap.put(registrant.getId(), lease);
// 添加到 最近注册的调试队列(用于 Eureka-Server 运维界面的显示,无实际业务逻辑使用)
recentRegisteredQueue.add(new Pair<Long, String>(
System.currentTimeMillis(),
registrant.getAppName() + "(" + registrant.getId() + ")"));
// This is where the initial state transfer of overridden status happens
// 添加到 应用实例覆盖状态映射(Eureka-Server 集群同步)
if (!InstanceStatus.UNKNOWN.equals(registrant.getOverriddenStatus())) {
logger.debug("Found overridden status {} for instance {}. Checking to see if needs to be add to the "
+ "overrides", registrant.getOverriddenStatus(), registrant.getId());
if (!overriddenInstanceStatusMap.containsKey(registrant.getId())) {
logger.info("Not found overridden id {} and hence adding it", registrant.getId());
overriddenInstanceStatusMap.put(registrant.getId(), registrant.getOverriddenStatus());
}
}
// 设置应用实例的覆盖状态( overridestatus ),避免注册应用实例后,丢失覆盖状态。
InstanceStatus overriddenStatusFromMap = overriddenInstanceStatusMap.get(registrant.getId());
if (overriddenStatusFromMap != null) {
logger.info("Storing overridden status {} from map", overriddenStatusFromMap);
registrant.setOverriddenStatus(overriddenStatusFromMap);
}
// 获得应用实例最终状态,并设置应用实例的状态
// Set the status based on the overridden status rules
InstanceStatus overriddenInstanceStatus = getOverriddenInstanceStatus(registrant, existingLease, isReplication);
registrant.setStatusWithoutDirty(overriddenInstanceStatus);
// 实例是运行状态,设置服务开始时间
// If the lease is registered with UP status, set lease service up timestamp
if (InstanceStatus.UP.equals(registrant.getStatus())) {
lease.serviceUp();
}
// 设置 应用实例信息的操作类型 为 添加
registrant.setActionType(ActionType.ADDED);
// 添加到 最近租约变更记录队列(用于注册信息的增量获取)
recentlyChangedQueue.add(new RecentlyChangedItem(lease));
// 设置 租约的最后更新时间戳
registrant.setLastUpdatedTimestamp();
// 设置 响应缓存 过期
invalidateCache(registrant.getAppName(), registrant.getVIPAddress(), registrant.getSecureVipAddress());
logger.info("Registered instance {}/{} with status {} (replication={})",
registrant.getAppName(), registrant.getId(), registrant.getStatus(), isReplication);
} finally {
read.unlock();
}
}
注册开启
看一下开启注册的流程
1、首先是设置预期客户端数量、更新续约阈值(自我保护机制)
2、设置开启时间、peerInstancesTransferEmptyOnStartup改为false,peerInstancesTransferEmptyOnStartup为true时,server也是不会对注册表获取请求做出正确响应。设置应用信息管理器为UP状态。
3、调用super.postInit(),详细分析下这个内容
public void openForTraffic(ApplicationInfoManager applicationInfoManager, int count) {
// Renewals happen every 30 seconds and for a minute it should be a factor of 2.
this.expectedNumberOfClientsSendingRenews = count;
updateRenewsPerMinThreshold();
logger.info("Got {} instances from neighboring DS node", count);
logger.info("Renew threshold is: {}", numberOfRenewsPerMinThreshold);
this.startupTime = System.currentTimeMillis();
/**
* 同步实例大于0,peerInstancesTransferEmptyOnStartup设置false
*/
if (count > 0) {
this.peerInstancesTransferEmptyOnStartup = false;
}
DataCenterInfo.Name selfName = applicationInfoManager.getInfo().getDataCenterInfo().getName();
boolean isAws = Name.Amazon == selfName;
if (isAws && serverConfig.shouldPrimeAwsReplicaConnections()) {
logger.info("Priming AWS connections for all replicas..");
primeAwsReplicas(applicationInfoManager);
}
logger.info("Changing status to UP");
applicationInfoManager.setInstanceStatus(InstanceStatus.UP);
super.postInit();
}
1、renewsLastMin.start();以定时任务的方式重置MeasuredRate的值
2、驱逐定时器,周期性的去执行驱逐实例任务,时间间隔默认是60秒,执行EvictionTask的run()方法
protected void postInit() {
renewsLastMin.start();
// 初始化 清理租约过期任务
if (evictionTaskRef.get() != null) {
evictionTaskRef.get().cancel();
}
// 定时任务 默认60000毫秒
evictionTaskRef.set(new EvictionTask());
evictionTimer.schedule(evictionTaskRef.get(),
serverConfig.getEvictionIntervalTimerInMs(),
serverConfig.getEvictionIntervalTimerInMs());
}
主要是进行两个步骤,先是获取不长时间毫秒数,即当前时间减去上次执行时间,减去时间间隔的结果,和0取最大值来作为补偿时间.
public void run() {
try {
// 获取 补偿时间毫秒数
long compensationTimeMs = getCompensationTimeMs();
logger.info("Running the evict task with compensationTime {}ms", compensationTimeMs);
// 清理过期租约逻辑
evict(compensationTimeMs);
} catch (Throwable e) {
logger.error("Could not run the evict task", e);
}
}
/**
* 当前时间 - 最后任务执行时间 - 任务执行频率 = 补偿时间毫秒数(由于 JVM GC ,又或是时间偏移( clock skew ) 等原因,定时器执行实际比预期会略有延迟)
*/
long getCompensationTimeMs() {
long currNanos = getCurrentTimeNano();
long lastNanos = lastExecutionNanosRef.getAndSet(currNanos);
if (lastNanos == 0l) {
return 0l;
}
long elapsedMs = TimeUnit.NANOSECONDS.toMillis(currNanos - lastNanos);
long compensationTime = elapsedMs - serverConfig.getEvictionIntervalTimerInMs();
return compensationTime <= 0l ? 0l : compensationTime;
}
下面看下驱逐实例的代码evict();
1、当发现每分钟续约数值小于最小阈值时,直接return(eurekaServer自我保护机制),若关闭自我保护机制,则忽略。
2、获取当前所有实例的租约信息,并加上补偿时间后,得到所有过期的实例租约。
3、计算最大允许过期的实例数值(预期实例数值*0.15)
4、获取过期租约总数和最大允许过期实例总数的最小值。这样单次驱逐,即使在关闭了自我保护机制的情况下,也是只过期15%的实例
5、因为过期数值要小于等于所有过期实例租约,所以通过随机数随机过期实例租约(internalCancel())
public void evict(long additionalLeaseMs) {
logger.debug("Running the evict task");
/**
* 驱逐实例时,若发现进行自我保护机制,则直接return
*/
if (!isLeaseExpirationEnabled()) {
logger.debug("DS: lease expiration is currently disabled.");
return;
}
/**
* 获取 所有过期的租约
*/
// We collect first all expired items, to evict them in random order. For large eviction sets,
// if we do not that, we might wipe out whole apps before self preservation kicks in. By randomizing it,
// the impact should be evenly distributed across all applications.
List<Lease<InstanceInfo>> expiredLeases = new ArrayList<>();
for (Entry<String, Map<String, Lease<InstanceInfo>>> groupEntry : registry.entrySet()) {
Map<String, Lease<InstanceInfo>> leaseMap = groupEntry.getValue();
if (leaseMap != null) {
for (Entry<String, Lease<InstanceInfo>> leaseEntry : leaseMap.entrySet()) {
Lease<InstanceInfo> lease = leaseEntry.getValue();
if (lease.isExpired(additionalLeaseMs) && lease.getHolder() != null) {
expiredLeases.add(lease);
}
}
}
}
/**
* 计算最大允许清理租约数量(跟自我保护机制有关,实例数量*(1-0.85)就是单次最大的租约数量)
*/
// To compensate for GC pauses or drifting local time, we need to use current registry size as a base for
// triggering self-preservation. Without that we would wipe out full registry.
int registrySize = (int) getLocalRegistrySize();
int registrySizeThreshold = (int) (registrySize * serverConfig.getRenewalPercentThreshold());
int evictionLimit = registrySize - registrySizeThreshold;
/**
* 去过期,顶多过期 最大允许清理租约数量 的应用实例(不会触发自我保护机制,否则上面代码已经return)
* 即使关闭了自我保护,也是分批过期的,每批最大过期15%
*/
int toEvict = Math.min(expiredLeases.size(), evictionLimit);
if (toEvict > 0) {
logger.info("Evicting {} items (expired={}, evictionLimit={})", toEvict, expiredLeases.size(), evictionLimit);
// 随机过期 应用实例租约信息。由于租约是按照应用顺序添加到数组,通过随机的方式,尽量避免单个应用被全部过期
Random random = new Random(System.currentTimeMillis()); // 传入当前时间为种子生成随机,避免 Java 的伪随机情况。
for (int i = 0; i < toEvict; i++) {
// Pick a random item (Knuth shuffle algorithm)
int next = i + random.nextInt(expiredLeases.size() - i);
Collections.swap(expiredLeases, i, next);
Lease<InstanceInfo> lease = expiredLeases.get(i);
String appName = lease.getHolder().getAppName();
String id = lease.getHolder().getId();
EXPIRED.increment();
logger.warn("DS: Registry: expired lease for {}/{}", appName, id);
internalCancel(appName, id, false);
}
}
}
下面看下internalCancel()是如果移除单个过期实例的
1、移除当前server注册信息的需过期的实例租约数据,该实例添加到最近取消的队列中,移除该实例的覆盖状态map中的数据。
2、若该实例租约存在,则添加到最近改变的队列中(操作状态为DELETED,便于client增量获取时,从队列中即可得到最近变更的实例),过期server响应缓存,因为有实例删除了,所有更新server的预期客户端数值,并更新续约最小阈值。
protected boolean internalCancel(String appName, String id, boolean isReplication) {
read.lock();
try {
// 移除租约
CANCEL.increment(isReplication);
Map<String, Lease<InstanceInfo>> gMap = registry.get(appName);
Lease<InstanceInfo> leaseToCancel = null;
if (gMap != null) {
leaseToCancel = gMap.remove(id);
}
// 添加到最近的取消队列中(用于 Eureka-Server 运维界面的显示,无实际业务逻辑使用)
recentCanceledQueue.add(new Pair<Long, String>(System.currentTimeMillis(), appName + "(" + id + ")"));
// 移除 应用实例覆盖状态映射
InstanceStatus instanceStatus = overriddenInstanceStatusMap.remove(id);
if (instanceStatus != null) {
logger.debug("Removed instance id {} from the overridden map which has value {}", id, instanceStatus.name());
}
if (leaseToCancel == null) {
// 添加 取消注册不存在 到 监控
CANCEL_NOT_FOUND.increment(isReplication);
logger.warn("DS: Registry: cancel failed because Lease is not registered for: {}/{}", appName, id);
return false;
} else {
// 租约设置取消注册时间戳,添加到最近租约变更记录
// 设置应用实例信息的操作类型为删除,并添加到最近租约变更记录队列( recentlyChangedQueue )。recentlyChangedQueue 用于注册信息的增量获取
leaseToCancel.cancel();
InstanceInfo instanceInfo = leaseToCancel.getHolder();
String vip = null;
String svip = null;
if (instanceInfo != null) {
instanceInfo.setActionType(ActionType.DELETED);
recentlyChangedQueue.add(new RecentlyChangedItem(leaseToCancel));
instanceInfo.setLastUpdatedTimestamp();
vip = instanceInfo.getVIPAddress();
svip = instanceInfo.getSecureVipAddress();
}
// 设置 响应缓存 过期
invalidateCache(appName, vip, svip);
logger.info("Cancelled instance {}/{} (replication={})", appName, id, isReplication);
}
} finally {
read.unlock();
}
synchronized (lock) {
// 应用实例数减1,并更新自我保护阈值(实例数每分钟调用总次数 * 0.85)
if (this.expectedNumberOfClientsSendingRenews > 0) {
// Since the client wants to cancel it, reduce the number of clients to send renews.
this.expectedNumberOfClientsSendingRenews = this.expectedNumberOfClientsSendingRenews - 1;
updateRenewsPerMinThreshold();
}
}
return true;
}