SpringCloud服务治理框架中,针对 Ribbon & Feign 两者涉及的熔断 & 降级策略大同小异。
相同之处:降级 & 熔断具体策略都是响应式编程 + 滑动窗口算法 + 三态装换图。
不同之处:Ribbon 是通过AOP方式之 HystrixCommandAspect
方式实现的。Feign 是通过 HystrixFeign + FactoryBean动态代理实现的。
1、熔断降级涉及的配置信息
protected HystrixCommandProperties(HystrixCommandKey key, HystrixCommandProperties.Setter builder, String
propertyPrefix) {
this.key = key;
String propertyPrefix = "hystrix"
// hystrix.command.timeout.circuitBreaker.enabled
//hystrix.command.default.circuitBreaker.enabled
this.circuitBreakerEnabled = getProperty(propertyPrefix, key, "circuitBreaker.enabled",
builder.getCircuitBreakerEnabled(), true);
// hystrix.command.timeout.circuitBreaker.requestVolumeThreshold
//hystrix.command.default.circuitBreaker.requestVolumeThreshold
this.circuitBreakerRequestVolumeThreshold = getProperty(propertyPrefix, key,
"circuitBreaker.requestVolumeThreshold", builder.getCircuitBreakerRequestVolumeThreshold(), 20);
// hystrix.command.timeout.circuitBreaker.sleepWindowInMilliseconds
//hystrix.command.default.circuitBreaker.sleepWindowInMilliseconds
this.circuitBreakerSleepWindowInMilliseconds = getProperty(propertyPrefix, key,
"circuitBreaker.sleepWindowInMilliseconds", builder.getCircuitBreakerSleepWindowInMilliseconds(), 5000);
// hystrix.command.timeout.circuitBreaker.errorThresholdPercentage
//hystrix.command.default.circuitBreaker.errorThresholdPercentage
this.circuitBreakerErrorThresholdPercentage = getProperty(propertyPrefix, key,
"circuitBreaker.errorThresholdPercentage", builder.getCircuitBreakerErrorThresholdPercentage(), 50);
// hystrix.command.timeout.circuitBreaker.forceOpen
//hystrix.command.default.circuitBreaker.forceOpen
this.circuitBreakerForceOpen = getProperty(propertyPrefix, key, "circuitBreaker.forceOpen",
builder.getCircuitBreakerForceOpen(), false);
// hystrix.command.timeout.circuitBreaker.forceClosed
//hystrix.command.default.circuitBreaker.forceClosed
this.circuitBreakerForceClosed = getProperty(propertyPrefix, key, "circuitBreaker.forceClosed",
builder.getCircuitBreakerForceClosed(), false);
// hystrix.command.timeout.execution.isolation.strategy
//hystrix.command.default.execution.isolation.strategy
this.executionIsolationStrategy = getProperty(propertyPrefix, key, "execution.isolation.strategy",
builder.getExecutionIsolationStrategy(), default_executionIsolationStrategy);
// hystrix.command.timeout.execution.isolation.thread.timeoutInMilliseconds
//hystrix.command.default.execution.isolation.thread.timeoutInMilliseconds 设置超时时间
this.executionTimeoutInMilliseconds = getProperty(propertyPrefix, key,
"execution.isolation.thread.timeoutInMilliseconds", builder.getExecutionIsolationThreadTimeoutInMilliseconds(), 1000);
// hystrix.command.timeout.execution.timeout.enabled
//hystrix.command.default.execution.timeout.enabled 超时开关
this.executionTimeoutEnabled = getProperty(propertyPrefix, key, "execution.timeout.enabled",
builder.getExecutionTimeoutEnabled(), true);
// hystrix.command.timeout.execution.isolation.thread.interruptOnTimeout
//hystrix.command.default.execution.isolation.thread.interruptOnTimeout 当隔离策略为THREAD时,当执行线程执行超时时,是否进行中断处理,默认为true
this.executionIsolationThreadInterruptOnTimeout = getProperty(propertyPrefix, key, "execution.isolation.thread.interruptOnTimeout", builder.getExecutionIsolationThreadInterruptOnTimeout(), default_executionIsolationThreadInterruptOnTimeout);
// hystrix.command.timeout."execution.isolation.thread.interruptOnFutureCancel
//hystrix.command.default."execution.isolation.thread.interruptOnFutureCancel
this.executionIsolationThreadInterruptOnFutureCancel = getProperty(propertyPrefix, key, "execution.isolation.thread.interruptOnFutureCancel", builder.getExecutionIsolationThreadInterruptOnFutureCancel(), default_executionIsolationThreadInterruptOnFutureCancel);
this.executionIsolationSemaphoreMaxConcurrentRequests = getProperty(propertyPrefix, key, "execution.isolation.semaphore.maxConcurrentRequests", builder.getExecutionIsolationSemaphoreMaxConcurrentRequests(), default_executionIsolationSemaphoreMaxConcurrentRequests);
this.fallbackIsolationSemaphoreMaxConcurrentRequests = getProperty(propertyPrefix, key, "fallback.isolation.semaphore.maxConcurrentRequests", builder.getFallbackIsolationSemaphoreMaxConcurrentRequests(), default_fallbackIsolationSemaphoreMaxConcurrentRequests);
this.fallbackEnabled = getProperty(propertyPrefix, key, "fallback.enabled", builder.getFallbackEnabled(), default_fallbackEnabled);
this.metricsRollingStatisticalWindowInMilliseconds = getProperty(propertyPrefix, key, "metrics.rollingStats.timeInMilliseconds", builder.getMetricsRollingStatisticalWindowInMilliseconds(), default_metricsRollingStatisticalWindow);
this.metricsRollingStatisticalWindowBuckets = getProperty(propertyPrefix, key, "metrics.rollingStats.numBuckets", builder.getMetricsRollingStatisticalWindowBuckets(), default_metricsRollingStatisticalWindowBuckets);
this.metricsRollingPercentileEnabled = getProperty(propertyPrefix, key, "metrics.rollingPercentile.enabled", builder.getMetricsRollingPercentileEnabled(), default_metricsRollingPercentileEnabled);
this.metricsRollingPercentileWindowInMilliseconds = getProperty(propertyPrefix, key, "metrics.rollingPercentile.timeInMilliseconds", builder.getMetricsRollingPercentileWindowInMilliseconds(), default_metricsRollingPercentileWindow);
this.metricsRollingPercentileWindowBuckets = getProperty(propertyPrefix, key, "metrics.rollingPercentile.numBuckets", builder.getMetricsRollingPercentileWindowBuckets(), default_metricsRollingPercentileWindowBuckets);
this.metricsRollingPercentileBucketSize = getProperty(propertyPrefix, key, "metrics.rollingPercentile.bucketSize", builder.getMetricsRollingPercentileBucketSize(), default_metricsRollingPercentileBucketSize);
this.metricsHealthSnapshotIntervalInMilliseconds = getProperty(propertyPrefix, key, "metrics.healthSnapshot.intervalInMilliseconds", builder.getMetricsHealthSnapshotIntervalInMilliseconds(), default_metricsHealthSnapshotIntervalInMilliseconds);
this.requestCacheEnabled = getProperty(propertyPrefix, key, "requestCache.enabled", builder.getRequestCacheEnabled(), default_requestCacheEnabled);
this.requestLogEnabled = getProperty(propertyPrefix, key, "requestLog.enabled", builder.getRequestLogEnabled(), default_requestLogEnabled);
// threadpool doesn't have a global override, only instance level makes sense
this.executionIsolationThreadPoolKeyOverride = forString().add(propertyPrefix + ".command." + key.name() + ".threadPoolKeyOverride", null).build();
}
这些配置信息都可以在配置文件设置。
2、HystrixCommand
public abstract class HystrixCommand<R> extends AbstractCommand<R>{
protected Observable<R> getExecutionObservable() {
return Observable.defer(new Func0<Observable<R>>() {
@Override
public Observable<R> call() {
//不管是Ribbon & Feign ,Hystrix都是通过此处实现目标方法的调用
//如果是Feign则调用HystrixInvocationHandler#invoke中初始化的HystrixCommand
return Observable.just(run());
}
});
...
}
@Override
protected Observable<R> getFallbackObservable() {//超时 或者 异常
return Observable.defer(new Func0<Observable<R>>() {
@Override
public Observable<R> call() {
//不管是Ribbon & Feign ,Hystrix都是通过此处实现目标方法的降级调用
如果是Feign则调用HystrixInvocationHandler#invoke中初始化的HystrixCommand
return Observable.just(getFallback());
}
});
}
public Future<R> queue() {
// toObservable():将当前请求流程封装为响应式编程中观察者模式
// toBlocking:设置当前响应式编程为阻塞模式
// toFuture:订阅核心流程。single操作符保证流式操作只能存在一个元素,否则直接抛出异常处理。通过原子操作、
//CountDownLatch、Future保证阻塞操作。
final Future<R> delegate = toObservable().toBlocking().toFuture();// AbstractCommand#toObservable
...
}
}
3、AbstractCommand
通过三态转换关系选择降级 or 熔断。
abstract class AbstractCommand<R> implements HystrixInvokableInfo<R>, HystrixObservable<R> {
public Observable<R> toObservable() {
final Func0<Observable<R>> applyHystrixSemantics = new Func0<Observable<R>>() {
@Override
public Observable<R> call() {
return applyHystrixSemantics(_cmd);
}
};
return Observable.defer(new Func0<Observable<R>>() {
@Override
public Observable<R> call() {
...
Observable<R> hystrixObservable =
Observable.defer(applyHystrixSemantics)
.map(wrapWithAllOnNextHooks);
...
return hystrixObservable
.doOnTerminate(terminateCommandCleanup)
.doOnUnsubscribe(unsubscribeCommandCleanup)
.doOnCompleted(fireOnCompletedHook);
}
});
}
private Observable<R> applyHystrixSemantics(final AbstractCommand<R> _cmd) {
//HystrixCircuitBreaker的静态内部类HystrixCircuitBreakerImpl
if (circuitBreaker.allowRequest()) {//选择请求 降级 & 熔断与否
// 没有单独设置选择默认值:TryableSemaphoreNoOp
TryableSemaphore executionSemaphore = getExecutionSemaphore();
AtomicBoolean semaphoreHasBeenReleased = new AtomicBoolean(false);
...
if (executionSemaphore.tryAcquire()) {//TryableSemaphoreNoOp 对应的返回值始终为true
executionResult = executionResult.setInvocationStartTime(System.currentTimeMillis());
return executeCommandAndObserve(_cmd)//执行目标方法
.doOnError(markExceptionThrown)
.doOnTerminate(singleSemaphoreRelease)
.doOnUnsubscribe(singleSemaphoreRelease);
} else {
return handleSemaphoreRejectionViaFallback();//忽略掉目标方法,执行降级逻辑
}
} else {
return handleShortCircuitViaFallback();//忽略掉目标方法,执行降级逻辑
}
}
}
3.1、三态转换关系
static class HystrixCircuitBreakerImpl implements HystrixCircuitBreaker {
private AtomicBoolean circuitOpen = new AtomicBoolean(false);
// false:直接走降级,忽略下游服务的调用。true:根据下游服务响应情况【超时|异常】选择降级与否
public boolean allowRequest() {
//hystrix.command.default.circuitBreaker.forceOpen
if (properties.circuitBreakerForceOpen().get()) {
return false;
}
//hystrix.command.default.circuitBreaker.forceClosed
if (properties.circuitBreakerForceClosed().get()) {
isOpen();
return true;
}
return !isOpen() || allowSingleTest();//如果isOpen返回false则表示熔断器关闭状态,也即没必要执行allowSingleTest
}
public boolean allowSingleTest() {// 熔断器开启:返回值为false表明直接降级
// long timeCircuitOpenedOrWasLastTested = circuitOpenedOrLastTestedTime.get();
long timeOOLTT = circuitOpenedOrLastTestedTime.get();// isOpen方法设置该值
// circuitBreakerSleepWindowInMilliseconds:表示熔断器开启时长或者熔断时长
long circuitBreakerSleepWindowInMilliseconds = properties.circuitBreakerSleepWindowInMilliseconds().get();
if (circuitOpen.get() && System.currentTimeMillis(); > timeOOLTT + circuitBreakerSleepWindowInMilliseconds) {
if (circuitOpenedOrLastTestedTime.compareAndSet(timeOOLTT, System.currentTimeMillis())){
return true;
}
}
return false;
}
@Override
public boolean isOpen() {// 返回true则表示熔断器是打开的,否则熔断器是关闭的
if (circuitOpen.get()) {
return true;
}
HealthCounts health = metrics.getHealthCounts();
// 当前窗口【rolling window】中的请求量必须达到阈值requestVolumeThreshold
if (health.getTotalRequests() < properties.circuitBreakerRequestVolumeThreshold().get()) {
return false;
}
// 如果已经超过阈值,则判断当前窗口时期内请求异常数是否超过阈值【errorThresholdPercentage】,如果小于则放过请求否则降级处理
if (health.getErrorPercentage() < properties.circuitBreakerErrorThresholdPercentage().get()) {
return false;
} else {//否则异常率过高直接打开熔断器
if (circuitOpen.compareAndSet(false, true)) {
circuitOpenedOrLastTestedTime.set(System.currentTimeMillis());
return true;
} else {
return true;
}
}
}
}
hystrix.command.default.circuitBreaker.forceOpen:true表示熔断器始终处于开启状态,所有请求全部直接降级处理。
hystrix.command.default.circuitBreaker.forceClosed:true表示熔断器始终处于关闭状态,即服务降级与否无需三态转换关系参与。每个请求均直接打到下游,根据下游服务超时、异常等情况选择当前请求降级与否。【关闭熔断功能,降级不受熔断的影响】
forceOpen & forceClosed 均为false
的前提下进一步讨论如下熔断问题:
hystrix.command.default.circuitBreaker.requestVolumeThreshold:10秒内请求量达到默认值20。
hystrix.command.default.circuitBreaker.errorThresholdPercentage:10秒内错误比率达到默认值为50。
熔断器开启的条件:
- 一个滚动窗口内的请求量必须达到requestVolumeThreshold才有资格谈论熔断器开关的开闭情况。
- 步骤1满足的前提下,错误率达到errorThresholdPercentage则打开熔断器。
hystrix.command.default.circuitBreaker.sleepWindowInMilliseconds:默认值5000。熔断器开启的前提下,在其之后的sleepWindowInMilliseconds毫秒内的请求全部降级处理。
4.超时与否处理之定时任务
此章节分析存在一个核心配置之hystrix.command.default.execution.timeout.enabled
【默认true,控制HystrixObservableTimeoutOperator功能的开关】。
hystrix.command.default.execution.isolation.thread.timeoutInMilliseconds:默认1000毫秒。
hystrix.command.default.execution.isolation.thread.interruptOnTimeout:默认为true。控制当前线程超时后是否需要中断该线程继续执行。
只有配置项 hystrix.command.default.execution.timeout.enabled 为true的前提下,上述引用中配置才存在意义。
*提前总结*
:
- 提起超时存在两种:连接超时 & 读超时。Ribbon客户端或者Http客户端建立连接、返回响应【读】的最长等待时间。
- 通过上述源码分析得知
timeoutInMilliseconds
配置项是指 建立连接 ~ 返回响应 期间最长等待时间【连接超时 + 读超时】。 - 步骤2功能是通过核心类
HystrixObservableTimeoutOperator
实现的。 - 即使步骤1不会发生,但是如果 timeoutInMilliseconds 过小照样会发生降级。SpringCloud微服务自设超时时间小于响应时间。
abstract class AbstractCommand<R> implements HystrixInvokableInfo<R>, HystrixObservable<R> {
private Observable<R> executeCommandAndObserve(final AbstractCommand<R> _cmd) {
HystrixRequestContext currentRequestContext = HystrixRequestContext.getContextForCurrentThread();
// 降级策略执行:超时 或者 抛出异常
Func1<Throwable, Observable<R>> handleFallback = new Func1<Throwable, Observable<R>>() {
@Override
public Observable<R> call(Throwable t) {
Exception e = getExceptionFromThrowable(t);
executionResult = executionResult.setExecutionException(e);
if (e instanceof RejectedExecutionException) {
return handleThreadPoolRejectionViaFallback(e);
} else if (t instanceof HystrixTimeoutException) {
return handleTimeoutViaFallback();
} else if (t instanceof HystrixBadRequestException) {
return handleBadRequestByEmittingError(e);
} else {
if (e instanceof HystrixBadRequestException) {
eventNotifier.markEvent(HystrixEventType.BAD_REQUEST, commandKey);
return Observable.error(e);
}
return handleFailureViaFallback(e);
}
}
};
...
Observable<R> execution;//重新初始化一个Observable
//hystrix.command.default.execution.timeout.enabled
if (properties.executionTimeoutEnabled().get()) {
// 相关超时的实现方式HystrixObservableTimeoutOperator
execution = executeCommandWithSpecifiedIsolation(_cmd).lift(new HystrixObservableTimeoutOperator<R>(_cmd));
} else {
execution = executeCommandWithSpecifiedIsolation(_cmd);
}
return execution.doOnNext(markEmits)
.doOnCompleted(markOnCompleted)
.onErrorResumeNext(handleFallback)// 降级策略目标方法执行
.doOnEach(setRequestContext);
}
private Observable<R> executeCommandWithSpecifiedIsolation(final AbstractCommand<R> _cmd) {
// 服务保护之线程隔离
if (properties.executionIsolationStrategy().get() == ExecutionIsolationStrategy.THREAD) {
return Observable.defer(new Func0<Observable<R>>() {
@Override
// 该方法调用与HystrixObservableTimeoutOperator功能执行采用不同的线程
public Observable<R> call() {
executionResult = executionResult.setExecutionOccurred();
...
metrics.markCommandStart(commandKey, threadPoolKey, ExecutionIsolationStrategy.THREAD);
// hystrix.command.default.execution.timeout.enabled = true 才存在可能异常 RuntimeException
if (isCommandTimedOut.get() == TimedOutStatus.TIMED_OUT) {
// 该条件成立,说明此时已经超时
return Observable.error(new RuntimeException("timed out before executing run()"));
}
if (threadState.compareAndSet(ThreadState.NOT_USING_THREAD, ThreadState.STARTED)) {
...
return getUserExecutionObservable(_cmd);
} else {
return Observable.error(new RuntimeException("unsubscribed before executing run()"));
}
}
}).subscribeOn(threadPool.getScheduler(new Func0<Boolean>() {//异步处理订阅
@Override
public Boolean call() {
// hystrix.command.timeout.execution.isolation.thread.interruptOnTimeout
Boolean itiot = properties.executionIsolationThreadInterruptOnTimeout().get();
return itiot && _cmd.isCommandTimedOut.get() == TimedOutStatus.TIMED_OUT;
}
}));
}else ... // 否则 信号量 隔离
}
}
subscribeOn:当前被观察者Observable后续之后的订阅采用异步执行。即超时处理之HystrixObservableTimeoutOperator跟核心线程之目标方法调用的线程采用不同的线程。
4.1.HystrixObservableTimeoutOperator
增加一个控制超时功能的定时任务。即定时任务HystrixTimer跟目标任务执行线程是异步执行的。在 timeoutInMilliseconds 时间内目标任务线程尚未获取到下游服务的响应,则定时任务线程就会直接中断目标任务线程等待下游响应,直接降级处理。
public abstract class AbstractCommand{
AtomicReference<TimedOutStatus> ic = new AtomicReference<TimedOutStatus>(TimedOutStatus.NOT_EXECUTED);
private static class HystrixObservableTimeoutOperator<R> implements Operator<R, R> {
AbstractCommand<R> oc;
public HystrixObservableTimeoutOperator(final AbstractCommand<R> originalCommand) {
this.oc = originalCommand;
}
@Override
public Subscriber<? super R> call(final Subscriber<? super R> child) {
final CompositeSubscription s = new CompositeSubscription();
child.add(s);
...
TimerListener listener = new TimerListener() {
@Override
public void tick() {
// 默认TimedOutStatus.NOT_EXECUTED,如果定时任务执行时isCommandTimedOut还是默认值,说明请求此时已经超时。并且更改为TIMED_OUT
if (oc.ic.compareAndSet(TimedOutStatus.NOT_EXECUTED, TimedOutStatus.TIMED_OUT)) {
oc.eventNotifier.markEvent(HystrixEventType.TIMEOUT, originalCommand.commandKey);
s.unsubscribe();
timeoutRunnable.run();
}
}
@Override
// hystrix.command.default.execution.isolation.thread.timeoutInMilliseconds配置信息
public int getIntervalTimeInMilliseconds() {
return oc.properties.executionTimeoutInMilliseconds().get();
}
};
// #4.1 初始化定时任务
final Reference<TimerListener> tl = HystrixTimer.getInstance().addTimerListener(listener);
oc.timeoutTimer.set(tl);
Subscriber<R> parent = new Subscriber<R>() {
...
@Override
public void onNext(R v) {
if (isNotTimedOut()) {
child.onNext(v);
}
}
private boolean isNotTimedOut() {
return oc.ic.get() == TimedOutStatus.COMPLETED ||
oc.ic.compareAndSet(TimedOutStatus.NOT_EXECUTED, TimedOutStatus.COMPLETED);
}
};
s.add(parent);
return parent;
}
}
5.执行目标方法
public abstract class AbstractCommand{
private Observable<R> getUserExecutionObservable(final AbstractCommand<R> _cmd) {
Observable<R> userObservable;
//HystrixCommand#getExecutionObservable
userObservable = getExecutionObservable();
return userObservable
.lift(new ExecutionHookApplication(_cmd))
.lift(new DeprecatedOnRunHookApplication(_cmd));
}
}