一、概述
Client的轮询包括两部分:
RemoteConfigRepository
,定时轮询Config Service
的/configs/{appId}/{clusterName}/{namespace} 接口的配置读取RemoteConfigLongPollService
长轮询Config Service
的配置变更通知 /notification/v2接口
二.代码流程
1. AbstractConfigRepository#trySync
方法
- 客户端同步接口,具体实现是
sync()
方法,RemoteConfigRepository
实现了这个方法,实现从Config Service
拉取配置,并缓存在内存中,包含定时+实时
刷新缓存
protected boolean trySync() {
try {
// 同步
sync();
// 返回同步成功
return true;
} catch (Throwable ex) {
Tracer.logEvent("ApolloConfigException", ExceptionUtil.getDetailMessage(ex));
logger
.warn("Sync config failed, will retry. Repository {}, reason: {}", this.getClass(), ExceptionUtil
.getDetailMessage(ex));
}
// 返回同步失败
return false;
}
RemoteConfigRepository
类属性
和构造方法
解析
下面重点看下构造方法中的初始化逻辑,首先会调用
trySync
尝试进行同步,然后
调用schedulePeriodicRefresh
初始化定时任务定时拉取更新,最后调用scheduleLongPollingRefresh
将自己注册到长轮询服务,实现配置更新的实时通知
public class RemoteConfigRepository extends AbstractConfigRepository {
private static final Logger logger = DeferredLoggerFactory.getLogger(RemoteConfigRepository.class);
private static final Joiner STRING_JOINER = Joiner.on(ConfigConsts.CLUSTER_NAMESPACE_SEPARATOR);
private static final Joiner.MapJoiner MAP_JOINER = Joiner.on("&").withKeyValueSeparator("=");
private static final Escaper pathEscaper = UrlEscapers.urlPathSegmentEscaper();
private static final Escaper queryParamEscaper = UrlEscapers.urlFormParameterEscaper();
private final ConfigServiceLocator m_serviceLocator;
private final HttpClient m_httpClient;
private final ConfigUtil m_configUtil;
//远程配置长轮询服务
private final RemoteConfigLongPollService remoteConfigLongPollService;
//指向 ApolloConfig 的 AtomicRefercence 配置缓存
private volatile AtomicReference<ApolloConfig> m_configCache;
// namesapce名字
private final String m_namespace;
// ScheduledExecutorService 对象
private final static ScheduledExecutorService m_executorService;
//指向 ApolloNotificationMessages 的 AtomicReference
private final AtomicReference<ServiceDTO> m_longPollServiceDto;
// 指向 ApolloNotificationmessages 的 AtomicReference
private final AtomicReference<ApolloNotificationMessages> m_remoteMessages;
// 加载配置的 RateLimiter
private final RateLimiter m_loadConfigRateLimiter;
// 是否强制拉取缓存标志
//若为 true,则多一轮从 Config Service 拉取配置
//为true的原因,RemoteConfigRepository 知道 Config Service 有配置刷新
private final AtomicBoolean m_configNeedForceRefresh;
//失败定时重试策略
private final SchedulePolicy m_loadConfigFailSchedulePolicy;
private static final Gson GSON = new Gson();
static {
// 单线程池
m_executorService = Executors.newScheduledThreadPool(1,
ApolloThreadFactory.create("RemoteConfigRepository", true));
}
/**
* Constructor.
*
* @param namespace the namespace
*/
public RemoteConfigRepository(String namespace) {
m_namespace = namespace;
m_configCache = new AtomicReference<>();
m_configUtil = ApolloInjector.getInstance(ConfigUtil.class);
m_httpClient = ApolloInjector.getInstance(HttpClient.class);
m_serviceLocator = ApolloInjector.getInstance(ConfigServiceLocator.class);
remoteConfigLongPollService = ApolloInjector.getInstance(RemoteConfigLongPollService.class);
m_longPollServiceDto = new AtomicReference<>();
m_remoteMessages = new AtomicReference<>();
m_loadConfigRateLimiter = RateLimiter.create(m_configUtil.getLoadConfigQPS());
m_configNeedForceRefresh = new AtomicBoolean(true);
m_loadConfigFailSchedulePolicy = new ExponentialSchedulePolicy(m_configUtil.getOnErrorRetryInterval(),
m_configUtil.getOnErrorRetryInterval() * 8);
// 尝试同步配置
this.trySync();
// 初始化定时刷新配置的任务
this.schedulePeriodicRefresh();
// 注册自己到 RemoteConfigLongPollService 中,实现配置更新的实时通知
this.scheduleLongPollingRefresh();
}
trySync
方法是个抽象方法,具体的实现是sync
方法,我们直接看RemoteConfigRepository
的sync
方法
这里就是去config service中拉取最新的配置,设置到本地缓存中,并且和之前本地缓存不一致说明有更新,然后触发对应的监听器们
@Override
protected synchronized void sync() {
Transaction transaction = Tracer.newTransaction("Apollo.ConfigService", "syncRemoteConfig");
try {
// 获得缓存的 ApolloConfig 对象
ApolloConfig previous = m_configCache.get();
// 从 Config Service 加载 ApolloConfig 对象
ApolloConfig current = loadApolloConfig();
//reference equals means HTTP 304
// 若不想等,说明更新了,设置到缓存中
if (previous != current) {
logger.debug("Remote Config refreshed!");
// 设置到缓存
m_configCache.set(current);
// 发布 Repository 的配置发生变化,触发对应的监听器们
this.fireRepositoryChange(m_namespace, this.getConfig());
}
if (current != null) {
Tracer.logEvent(String.format("Apollo.Client.Configs.%s", current.getNamespaceName()),
current.getReleaseKey());
}
transaction.setStatus(Transaction.SUCCESS);
} catch (Throwable ex) {
transaction.setStatus(ex);
throw ex;
} finally {
transaction.complete();
}
}
4.loadApolloConfig
拉取配置
这里就是去config service拉取配置,会做次数限流,重试等逻辑,
private ApolloConfig loadApolloConfig() {
// 限流
if (!m_loadConfigRateLimiter.tryAcquire(5, TimeUnit.SECONDS)) {
//wait at most 5 seconds
try {
TimeUnit.SECONDS.sleep(5);
} catch (InterruptedException e) {
}
}
// 获取 appId cluster dataCenter 配置信息
String appId = m_configUtil.getAppId();
String cluster = m_configUtil.getCluster();
String dataCenter = m_configUtil.getDataCenter();
String secret = m_configUtil.getAccessKeySecret();
Tracer.logEvent("Apollo.Client.ConfigMeta", STRING_JOINER.join(appId, cluster, m_namespace));
// 计算充重试次数
int maxRetries = m_configNeedForceRefresh.get() ? 2 : 1;
long onErrorSleepTime = 0; // 0 means no sleep
Throwable exception = null;
// 获得所有的 Config Service 的地址
List<ServiceDTO> configServices = getConfigServices();
String url = null;
retryLoopLabel:
// 循环读取配置重试次数直到成功。每一次,都会循环所有的 ServiceDTO数组
for (int i = 0; i < maxRetries; i++) {
// 随机所有的 Config Service 的地址
List<ServiceDTO> randomConfigServices = Lists.newLinkedList(configServices);
Collections.shuffle(randomConfigServices);
//Access the server which notifies the client first
// 优先访问通知配置变更的 Config Service的地址,并且,获取到时,需要置空,避免重复优先访问
if (m_longPollServiceDto.get() != null) {
randomConfigServices.add(0, m_longPollServiceDto.getAndSet(null));
}
// 循环所有的 Config Service 的地址
for (ServiceDTO configService : randomConfigServices) {
// sleep 等待,下次从 Config Service 拉取配置
if (onErrorSleepTime > 0) {
logger.warn(
"Load config failed, will retry in {} {}. appId: {}, cluster: {}, namespaces: {}",
onErrorSleepTime, m_configUtil.getOnErrorRetryIntervalTimeUnit(), appId, cluster, m_namespace);
try {
m_configUtil.getOnErrorRetryIntervalTimeUnit().sleep(onErrorSleepTime);
} catch (InterruptedException e) {
//ignore
}
}
// 组装轮询 Config Service 的配置读取接口的url
url = assembleQueryConfigUrl(configService.getHomepageUrl(), appId, cluster, m_namespace,
dataCenter, m_remoteMessages.get(), m_configCache.get());
logger.debug("Loading config from {}", url);
//创建 HttpRequest
HttpRequest request = new HttpRequest(url);
if (!StringUtils.isBlank(secret)) {
Map<String, String> headers = Signature.buildHttpHeaders(url, appId, secret);
request.setHeaders(headers);
}
Transaction transaction = Tracer.newTransaction("Apollo.ConfigService", "queryConfig");
transaction.addData("Url", url);
try {
//发起请求,返回 HttpResponse 对象
HttpResponse<ApolloConfig> response = m_httpClient.doGet(request, ApolloConfig.class);
//设置 m_configNeedForceRefresh 标志为 false
m_configNeedForceRefresh.set(false);
// 标记成功
m_loadConfigFailSchedulePolicy.success();
transaction.addData("StatusCode", response.getStatusCode());
transaction.setStatus(Transaction.SUCCESS);
//无新的配置,直接返回缓存的 ApolloConfig
if (response.getStatusCode() == 304) {
logger.debug("Config server responds with 304 HTTP status code.");
return m_configCache.get();
}
//有新的配置,进行返回新的 ApolloConfig 对象
ApolloConfig result = response.getBody();
logger.debug("Loaded config for {}: {}", m_namespace, result);
return result;
} catch (ApolloConfigStatusCodeException ex) {
ApolloConfigStatusCodeException statusCodeException = ex;
//config not found
if (ex.getStatusCode() == 404) {
String message = String.format(
"Could not find config for namespace - appId: %s, cluster: %s, namespace: %s, " +
"please check whether the configs are released in Apollo!",
appId, cluster, m_namespace);
statusCodeException = new ApolloConfigStatusCodeException(ex.getStatusCode(),
message);
}
Tracer.logEvent("ApolloConfigException", ExceptionUtil.getDetailMessage(statusCodeException));
transaction.setStatus(statusCodeException);
exception = statusCodeException;
if (ex.getStatusCode() == 404) {
break retryLoopLabel;
}
} catch (Throwable ex) {
Tracer.logEvent("ApolloConfigException", ExceptionUtil.getDetailMessage(ex));
transaction.setStatus(ex);
exception = ex;
} finally {
transaction.complete();
}
// if force refresh, do normal sleep, if normal config load, do exponential sleep
// 计算延迟时间
onErrorSleepTime = m_configNeedForceRefresh.get() ? m_configUtil.getOnErrorRetryInterval() :
m_loadConfigFailSchedulePolicy.fail();
}
}
// 若查询配置失败,抛出 ApolloConfigException 异常
String message = String.format(
"Load Apollo Config failed - appId: %s, cluster: %s, namespace: %s, url: %s",
appId, cluster, m_namespace, url);
throw new ApolloConfigException(message, exception);
}
RemoteConfigRepository#schedulePeriodicRefresh
这里可以发现就是一个定时任务,然后定时调用
trySync
方法同步配置
private void schedulePeriodicRefresh() {
logger.debug("Schedule periodic refresh with interval: {} {}",
m_configUtil.getRefreshInterval(), m_configUtil.getRefreshIntervalTimeUnit());
// 创建定时任务,定时刷新配置
m_executorService.scheduleAtFixedRate(
new Runnable() {
@Override
public void run() {
Tracer.logEvent("Apollo.ConfigService", String.format("periodicRefresh: %s", m_namespace));
logger.debug("refresh config for namespace: {}", m_namespace);
// 尝试同步配置
trySync();
Tracer.logEvent("Apollo.Client.Version", Apollo.VERSION);
}
}, m_configUtil.getRefreshInterval(), m_configUtil.getRefreshInterval(),
m_configUtil.getRefreshIntervalTimeUnit());
}
RemoteConfigRepository#scheduleLongPollingRefresh
将自己提交到长轮询服务中
private void scheduleLongPollingRefresh() {
remoteConfigLongPollService.submit(m_namespace, this);
}
remoteConfigLongPollService#submit
这里发现加锁就是往数据库写入一条数据,并且使用数据库的唯一索引来保证互斥。
private void tryLock(long namespaceId, String user) {
NamespaceLock lock = new NamespaceLock();
lock.setNamespaceId(namespaceId);
lock.setDataChangeCreatedBy(user);
lock.setDataChangeLastModifiedBy(user);
namespaceLockService.tryLock(lock);
}
adminservice
模块下的AppNamespaceController#create
这里首先把当前客户端和需要的namespace的关系保存到一个MultiMap中,然后也会添加到通知列表,最后启动长轮询服务。
public boolean submit(String namespace, RemoteConfigRepository remoteConfigRepository) {
//添加到 m_longPollNamespaces 中
boolean added = m_longPollNamespaces.put(namespace, remoteConfigRepository);
// 添加到 m_notifications
m_notifications.putIfAbsent(namespace, INIT_NOTIFICATION_ID);
// 若未启动长轮询定时任务,进行启动
if (!m_longPollStarted.get()) {
startLongPolling();
}
return added;
}
9. startLongPolling
方法
会启动一个线程,然后执行
doLongPollingRefresh
方法
private void startLongPolling() {
// CAS 设置长轮询任务已经启动。若已经启动,不重复启动
if (!m_longPollStarted.compareAndSet(false, true)) {
//already started
return;
}
try {
// 获得 appId cluster dataCenter 配置信息
final String appId = m_configUtil.getAppId();
final String cluster = m_configUtil.getCluster();
final String dataCenter = m_configUtil.getDataCenter();
final String secret = m_configUtil.getAccessKeySecret();
// 获得长轮询任务的初始化延迟时间,单位为毫秒
final long longPollingInitialDelayInMills = m_configUtil.getLongPollingInitialDelayInMills();
//提交长轮询任务,该任务会持续并循环执行
m_longPollingService.submit(new Runnable() {
@Override
public void run() {
//初始等待
if (longPollingInitialDelayInMills > 0) {
try {
logger.debug("Long polling will start in {} ms.", longPollingInitialDelayInMills);
TimeUnit.MILLISECONDS.sleep(longPollingInitialDelayInMills);
} catch (InterruptedException e) {
//ignore
}
}
//执行长轮询
doLongPollingRefresh(appId, cluster, dataCenter, secret);
}
});
} catch (Throwable ex) {
m_longPollStarted.set(false);
ApolloConfigException exception =
new ApolloConfigException("Schedule long polling refresh failed", ex);
Tracer.logError(exception);
logger.warn(ExceptionUtil.getDetailMessage(exception));
}
}
doLongPollingRefresh
方法
这里就是构造请求,然后发起请求,在一个
while
循环中不断发送请求,对应长轮询的实现是在configServer
,在收到请求后不会立即返回结果,而是通过Spring DeferredResult
把请求挂起,如果60s
内没有客户端关心的配置,会返回403
给客户端,如果有配置变更会立即返回,这里拿到了新的通知会调用notify
方法进行通知
private void doLongPollingRefresh(String appId, String cluster, String dataCenter, String secret) {
final Random random = new Random();
ServiceDTO lastServiceDto = null;
// 循环执行,直到停止或线程终端
while (!m_longPollingStopped.get() && !Thread.currentThread().isInterrupted()) {
// 限流
if (!m_longPollRateLimiter.tryAcquire(5, TimeUnit.SECONDS)) {
//wait at most 5 seconds
try {
TimeUnit.SECONDS.sleep(5);
} catch (InterruptedException e) {
}
}
Transaction transaction = Tracer.newTransaction("Apollo.ConfigService", "pollNotification");
String url = null;
try {
// 获得 Config Service 的地址
if (lastServiceDto == null) {
// 获得所有的 Config Service 的地址
List<ServiceDTO> configServices = getConfigServices();
lastServiceDto = configServices.get(random.nextInt(configServices.size()));
}
// 组装长轮询通知变更的地址
url =
assembleLongPollRefreshUrl(lastServiceDto.getHomepageUrl(), appId, cluster, dataCenter,
m_notifications);
logger.debug("Long polling from {}", url);
// 创建 HttpRequest 对象,并设置超时时间
HttpRequest request = new HttpRequest(url);
request.setReadTimeout(LONG_POLLING_READ_TIMEOUT);
if (!StringUtils.isBlank(secret)) {
Map<String, String> headers = Signature.buildHttpHeaders(url, appId, secret);
request.setHeaders(headers);
}
transaction.addData("Url", url);
// 发起请求,返回 HttpResponse 对象
final HttpResponse<List<ApolloConfigNotification>> response =
m_httpClient.doGet(request, m_responseType);
logger.debug("Long polling response: {}, url: {}", response.getStatusCode(), url);
//有新的通知,刷新本地的缓存
if (response.getStatusCode() == 200 && response.getBody() != null) {
// 更新 m_notifications
updateNotifications(response.getBody());
// 更新 m_remoteNotificationMessages
updateRemoteNotifications(response.getBody());
transaction.addData("Result", response.getBody().toString());
// 通知对应的 RemoteConfigReposifotry们
notify(lastServiceDto, response.getBody());
}
//try to load balance
// 无新的通知,重置连接的 Config Service 地址,下次请求不同的 Config Service,实现负载均衡
if (response.getStatusCode() == 304 && random.nextBoolean()) {
lastServiceDto = null;
}
//标记成功
m_longPollFailSchedulePolicyInSecond.success();
transaction.addData("StatusCode", response.getStatusCode());
transaction.setStatus(Transaction.SUCCESS);
} catch (Throwable ex) {
// 重置连接的 Config Servivce 的地址,下次请求不同的 Config Service
lastServiceDto = null;
Tracer.logEvent("ApolloConfigException", ExceptionUtil.getDetailMessage(ex));
transaction.setStatus(ex);
// 标记失败,计算下一次延迟执行时间
long sleepTimeInSecond = m_longPollFailSchedulePolicyInSecond.fail();
logger.warn(
"Long polling failed, will retry in {} seconds. appId: {}, cluster: {}, namespaces: {}, long polling url: {}, reason: {}",
sleepTimeInSecond, appId, cluster, assembleNamespaces(), url, ExceptionUtil.getDetailMessage(ex));
// 等待一定时间,下次失败重试
try {
TimeUnit.SECONDS.sleep(sleepTimeInSecond);
} catch (InterruptedException ie) {
//ignore
}
} finally {
transaction.complete();
}
}
}
notify
方法
这里会遍历所有的
RemoteConfigRepository
,然后通知对应的客户端,调用其onLongPollNotified
方法
private void notify(ServiceDTO lastServiceDto, List<ApolloConfigNotification> notifications) {
if (notifications == null || notifications.isEmpty()) {
return;
}
// 循环 ApolloConfigNotification
for (ApolloConfigNotification notification : notifications) {
String namespaceName = notification.getNamespaceName();
//create a new list to avoid ConcurrentModificationException
// 创建 RemoteConfigRepository数组,避免并发问题
List<RemoteConfigRepository> toBeNotified =
Lists.newArrayList(m_longPollNamespaces.get(namespaceName));
ApolloNotificationMessages originalMessages = m_remoteNotificationMessages.get(namespaceName);
// 获得远程的 ApolloNotificationMessages对象,并克隆
ApolloNotificationMessages remoteMessages = originalMessages == null ? null : originalMessages.clone();
//since .properties are filtered out by default, so we need to check if there is any listener for it
toBeNotified.addAll(m_longPollNamespaces
.get(String.format("%s.%s", namespaceName, ConfigFileFormat.Properties.getValue())));
// 循环 remoteConfigRepository,进行通知
for (RemoteConfigRepository remoteConfigRepository : toBeNotified) {
try {
//进行通知
remoteConfigRepository.onLongPollNotified(lastServiceDto, remoteMessages);
} catch (Throwable ex) {
Tracer.logError(ex);
}
}
}
}
RemoteConfigRepository#onLongPollNotified
方法
这里下面发现会直接调用
trySync
方法尝试进行同步,这里其实有疑问,就是为什么不在long polling 的返回结果中直接携带最新的配置,而是收到变更通知后再次发起拉取请求。原因就是加载配置接口是幂等的,推送配置的话就做不到了,因为和推送顺序相关,就比如长轮询的通知里面带有配置信息,定时轮询里面也拿到配置信息,这个时候client没法判断哪个配置是新的,通知是定时轮询的补充,有了通知,立马拉取。
public void onLongPollNotified(ServiceDTO longPollNotifiedServiceDto, ApolloNotificationMessages remoteMessages) {
//设置常轮训到配置更新时的 Config Service 下次同步配置时,优先读取该服务
m_longPollServiceDto.set(longPollNotifiedServiceDto);
// 设置 m_remoteMessages
m_remoteMessages.set(remoteMessages);
//提交同步任务
m_executorService.submit(new Runnable() {
@Override
public void run() {
//设置 m_configNeedForceRefresh 为true
m_configNeedForceRefresh.set(true);
//尝试同步配置
trySync();
}
});
}
- 整体流程图