Spring Cloud Eureka源码解析之服务剔除(七)

上篇讲到的是服务正常下线的情况,如果遇到网络不稳定或机器宕机了,导致了server一直接收不到服务的心跳,这时就可以认为该服务实例故障了,需要剔除了。
EurekaServer启动流程一节初始化eureka server上下文中,有这样一行代码

this.registry.openForTraffic(this.applicationInfoManager, registryCount);

这行代码就会创建一个定时剔除过期服务的定时任务

@Override
public void openForTraffic(ApplicationInfoManager applicationInfoManager, int count) {
    // 如果注册数为0,则默认设置注册数为1
    super.openForTraffic(applicationInfoManager,
                         count == 0 ? this.defaultOpenForTrafficCount : count);
}
@Override
public void openForTraffic(ApplicationInfoManager applicationInfoManager, int count) {
    // 计算每分钟期望收到的续约次数
    this.expectedNumberOfRenewsPerMin = count * 2;
    // 计算每分钟的续约阈值
    this.numberOfRenewsPerMinThreshold =
        (int) (this.expectedNumberOfRenewsPerMin * serverConfig.getRenewalPercentThreshold());
    logger.info("Got {} instances from neighboring DS node", count);
    logger.info("Renew threshold is: {}", numberOfRenewsPerMinThreshold);
    this.startupTime = System.currentTimeMillis();
    if (count > 0) {
        this.peerInstancesTransferEmptyOnStartup = false;
    }
    DataCenterInfo.Name selfName = applicationInfoManager.getInfo().getDataCenterInfo().getName();
    boolean isAws = Name.Amazon == selfName;
    if (isAws && serverConfig.shouldPrimeAwsReplicaConnections()) {
        logger.info("Priming AWS connections for all replicas..");
        primeAwsReplicas(applicationInfoManager);
    }
    logger.info("Changing status to UP");
    // 状态设为UP
    applicationInfoManager.setInstanceStatus(InstanceStatus.UP);
    // 调用父类初始化,定时剔除过期服务的定时任务在这里面
    super.postInit();
}
protected void postInit() {
    // 计算最近一分钟的续约数,其实就是将当前的续约数设置到最近一分钟续约数变量里
    renewsLastMin.start();
    // 若存在剔除任务则先将该任务取消
    if (evictionTaskRef.get() != null) {
        evictionTaskRef.get().cancel();
    }
    // 创建并启动服务过期剔除任务,默认每隔60s执行一次
    evictionTaskRef.set(new EvictionTask());
    evictionTimer.schedule(evictionTaskRef.get(),
                           serverConfig.getEvictionIntervalTimerInMs(),
                           serverConfig.getEvictionIntervalTimerInMs());
}

先看下如何计算最近一分钟的续约数

public class MeasuredRate {
    private static final Logger logger = LoggerFactory.getLogger(MeasuredRate.class);
    // 最近一分钟续约数
    private final AtomicLong lastBucket = new AtomicLong(0);
    // 当前续约数
    private final AtomicLong currentBucket = new AtomicLong(0);

    private final long sampleInterval;
    private final Timer timer;

    private volatile boolean isActive;

    /**
     * @param sampleInterval in milliseconds
     */
    public MeasuredRate(long sampleInterval) {
        this.sampleInterval = sampleInterval;
        this.timer = new Timer("Eureka-MeasureRateTimer", true);
        this.isActive = false;
    }

    public synchronized void start() {
        if (!isActive) {
            // 启动一个定时任务来计算最近一分钟续约数
            timer.schedule(new TimerTask() {

                @Override
                public void run() {
                    try {
                        // 就是将当前的续约数设置到最近一分钟续约数变量里,
                        // 并将当前续约数清零
                        lastBucket.set(currentBucket.getAndSet(0));
                    } catch (Throwable e) {
                        logger.error("Cannot reset the Measured Rate", e);
                    }
                }
            }, sampleInterval, sampleInterval);

            isActive = true;
        }
    }

    public synchronized void stop() {
        if (isActive) {
            timer.cancel();
            isActive = false;
        }
    }

    /**
     * Returns the count in the last sample interval.
     */
    public long getCount() {
        return lastBucket.get();
    }

    /**
     * 服务续约时就会调用该方法,当前续约数+1
     */
    public void increment() {
        currentBucket.incrementAndGet();
    }
}

服务过期剔除任务,默认每隔60s执行一次

class EvictionTask extends TimerTask {

    private final AtomicLong lastExecutionNanosRef = new AtomicLong(0l);

    @Override
    public void run() {
        try {
            // 计算补偿时间
            long compensationTimeMs = getCompensationTimeMs();
            logger.info("Running the evict task with compensationTime {}ms", compensationTimeMs);
            evict(compensationTimeMs);
        } catch (Throwable e) {
            logger.error("Could not run the evict task", e);
        }
    }

    long getCompensationTimeMs() {
        long currNanos = getCurrentTimeNano();
        long lastNanos = lastExecutionNanosRef.getAndSet(currNanos);
        if (lastNanos == 0l) {
            return 0l;
        }

        long elapsedMs = TimeUnit.NANOSECONDS.toMillis(currNanos - lastNanos);
        long compensationTime = elapsedMs - serverConfig.getEvictionIntervalTimerInMs();
        return compensationTime <= 0l ? 0l : compensationTime;
    }

    long getCurrentTimeNano() {  // for testing
        return System.nanoTime();
    }

}
public void evict(long additionalLeaseMs) {
    logger.debug("Running the evict task");

    // 是否启用了租约过期,其实就是判断是否启用了自我保护机制
    if (!isLeaseExpirationEnabled()) {
        logger.debug("DS: lease expiration is currently disabled.");
        return;
    }

    // We collect first all expired items, to evict them in random order. For large eviction sets,
    // if we do not that, we might wipe out whole apps before self preservation kicks in. By randomizing it,
    // the impact should be evenly distributed across all applications.
    // 先筛选出所有过期的服务实例
    List<Lease<InstanceInfo>> expiredLeases = new ArrayList<>();
    for (Entry<String, Map<String, Lease<InstanceInfo>>> groupEntry : registry.entrySet()) {
        Map<String, Lease<InstanceInfo>> leaseMap = groupEntry.getValue();
        if (leaseMap != null) {
            for (Entry<String, Lease<InstanceInfo>> leaseEntry : leaseMap.entrySet()) {
                Lease<InstanceInfo> lease = leaseEntry.getValue();
                // 判断是否过期,这里有一个bug,原本过期时间应该是90s,
                // 但由于续约时将更新时间设置为了当前时间+90s,所以导致了实际要180s后才会过期,
                // 但是该错误影响不大,所以官方就没有修复了
                if (lease.isExpired(additionalLeaseMs) && lease.getHolder() != null) {
                    expiredLeases.add(lease);
                }
            }
        }
    }

    // To compensate for GC pauses or drifting local time, we need to use current registry size as a base for
    // triggering self-preservation. Without that we would wipe out full registry.
    // 获取注册表的实例数
    int registrySize = (int) getLocalRegistrySize();
    // 计算每次剔除剩余注册数阈值(默认是已注册实例数*0.85)
    int registrySizeThreshold = (int) (registrySize * serverConfig.getRenewalPercentThreshold());
    // 每次剔除数量限制
    int evictionLimit = registrySize - registrySizeThreshold;

    // 剔除数量为实际过期实例数和剔除数量限制中的最小值,也就是说一次最多只能剔除 15% 的实例
    // 这样是防止一次剔除过多,导致某一类服务不可用
    int toEvict = Math.min(expiredLeases.size(), evictionLimit);
    if (toEvict > 0) {
        logger.info("Evicting {} items (expired={}, evictionLimit={})", toEvict, expiredLeases.size(), evictionLimit);

        // 然后从过期列表中随机取出toEvict个实例下线
        Random random = new Random(System.currentTimeMillis());
        for (int i = 0; i < toEvict; i++) {
            // Pick a random item (Knuth shuffle algorithm)
            int next = i + random.nextInt(expiredLeases.size() - i);
            Collections.swap(expiredLeases, i, next);
            Lease<InstanceInfo> lease = expiredLeases.get(i);

            String appName = lease.getHolder().getAppName();
            String id = lease.getHolder().getId();
            EXPIRED.increment();
            logger.warn("DS: Registry: expired lease for {}/{}", appName, id);
            // 服务下线,跟服务下线逻辑一样
            internalCancel(appName, id, false);
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值