心跳线程启动
在启动选举线程后,开始启动发送心跳的线程:
//每隔500ms发送一次心跳心跳线程 ScheduledExecutorService去调度
GlobalExecutor.registerHeartbeat(new HeartBeat());
public static void registerHeartbeat(Runnable runnable) {
//TICK_PERIOD_MS:500ms
executorService.scheduleWithFixedDelay(runnable, 0, TICK_PERIOD_MS, TimeUnit.MILLISECONDS);
}
直接看HeartBeat的run方法:
public void run() {
try {
if (!peers.isReady()) {
return;
}
RaftPeer local = peers.local();
local.heartbeatDueMs -= GlobalExecutor.TICK_PERIOD_MS;
if (local.heartbeatDueMs > 0) {
return;
}
local.resetHeartbeatDue();
//发送心跳
sendBeat();
} catch (Exception e) {
Loggers.RAFT.warn("[RAFT] error while sending beat {}", e);
}
}
里面最重要的就是sendBeat();方法了,该方法主要封装一些信息,最终将这些信息压缩成gzip,gzip再转换成byte数组的形式发送给所有的followers。然后等待http请求结果返回,将返回值做处理,实际上这里返回的就是follow节点自己的信息,leader拿到follower最新的信息加入到内存里的peers中。
peers.update(JSON.parseObject(response.getResponseBody(), RaftPeer.class));
public RaftPeer update(RaftPeer peer) {
peers.put(peer.ip, peer);
return peer;
}
对于接受leader心跳请求的follower端,从构造的url可知 ,follower接收请求的路径为/raft/beat ,找到对应的controller:RaftController
@NeedAuth
@RequestMapping(value = "/beat", method = RequestMethod.POST)
public JSONObject beat(HttpServletRequest request, HttpServletResponse response) throws Exception {
String entity = new String(IoUtils.tryDecompress(request.getInputStream()), StandardCharsets.UTF_8);
String value = URLDecoder.decode(entity, "UTF-8");
value = URLDecoder.decode(value, "UTF-8");
JSONObject json = JSON.parseObject(value);
JSONObject beat = JSON.parseObject(json.getString("beat"));
//处理收到的leader心跳
RaftPeer peer = raftCore.receivedBeat(beat);
return JSON.parseObject(JSON.toJSONString(peer));
}
处理逻辑在receivedBeat中:
public RaftPeer receivedBeat(JSONObject beat) throws Exception {
final RaftPeer local = peers.local();
final RaftPeer remote = new RaftPeer();
remote.ip = beat.getJSONObject("peer").getString("ip");
remote.state = RaftPeer.State.valueOf(beat.getJSONObject("peer").getString("state"));
remote.term.set(beat.getJSONObject("peer").getLongValue("term"));
remote.heartbeatDueMs = beat.getJSONObject("peer").getLongValue("heartbeatDueMs");
remote.leaderDueMs = beat.getJSONObject("peer").getLongValue("leaderDueMs");
remote.voteFor = beat.getJSONObject("peer").getString("voteFor");
if (remote.state != RaftPeer.State.LEADER) {
Loggers.RAFT.info("[RAFT] invalid state from master, state: {}, remote peer: {}",
remote.state, JSON.toJSONString(remote));
throw new IllegalArgumentException("invalid state from master, state: " + remote.state);
}
if (local.term.get() > remote.term.get()) {
Loggers.RAFT.info("[RAFT] out of date beat, beat-from-term: {}, beat-to-term: {}, remote peer: {}, and leaderDueMs: {}"
, remote.term.get(), local.term.get(), JSON.toJSONString(remote), local.leaderDueMs);
throw new IllegalArgumentException("out of date beat, beat-from-term: " + remote.term.get()
+ ", beat-to-term: " + local.term.get());
}
if (local.state != RaftPeer.State.FOLLOWER) {
Loggers.RAFT.info("[RAFT] make remote as leader, remote peer: {}", JSON.toJSONString(remote));
// mk follower
local.state = RaftPeer.State.FOLLOWER;
local.voteFor = remote.ip;
}
final JSONArray beatDatums = beat.getJSONArray("datums");
//follower根据这个变量判断是否要重新选leader,follower的masterElection线程根据该值选择是否重新选举
//每次收到心跳后,就重置该值,长时间没有收到心跳的话就会在选举线程每次减500ms的情况下,减到负数,触发选举
local.resetLeaderDue();
//重置心跳
local.resetHeartbeatDue();
peers.makeLeader(remote);
Map<String, Integer> receivedKeysMap = new HashMap<>(datums.size());
for (Map.Entry<String, Datum> entry : datums.entrySet()) {
receivedKeysMap.put(entry.getKey(), 0);
}
// now check datums
List<String> batch = new ArrayList<>();
if (!switchDomain.isSendBeatOnly()) {
int processedCount = 0;
if (Loggers.RAFT.isDebugEnabled()) {
Loggers.RAFT.debug("[RAFT] received beat with {} keys, RaftCore.datums' size is {}, remote server: {}, term: {}, local term: {}",
beatDatums.size(), datums.size(), remote.ip, remote.term, local.term);
}
for (Object object : beatDatums) {
processedCount = processedCount + 1;
JSONObject entry = (JSONObject) object;
String key = entry.getString("key");
final String datumKey;
if (KeyBuilder.matchServiceMetaKey(key)) {
datumKey = KeyBuilder.detailServiceMetaKey(key);
} else if (KeyBuilder.matchInstanceListKey(key)) {
datumKey = KeyBuilder.detailInstanceListkey(key);
} else {
// ignore corrupted key:
continue;
}
long timestamp = entry.getLong("timestamp");
receivedKeysMap.put(datumKey, 1);
try {
//datumKey来自远程beatDatums
//1.如果本地包含该实例的datumKey
//2.本地的该实例更新时间比远程的时间更新
//3.processedCount < beatDatums.size() 表示还没有处理完,请求中还有其他的实例需要做判断,或者远程过来的就比本地多,多出来的需要做处理
if (datums.containsKey(datumKey) && datums.get(datumKey).timestamp.get() >= timestamp && processedCount < beatDatums.size()) {
continue;
}
//本地不包含这个实例,或者远程的这个实例比较新,都要做下处理
if (!(datums.containsKey(datumKey) && datums.get(datumKey).timestamp.get() >= timestamp)) {
batch.add(datumKey);
}
//攒足50一起处理
if (batch.size() < 50 && processedCount < beatDatums.size()) {
continue;
}
String keys = StringUtils.join(batch, ",");
if (batch.size() <= 0) {
continue;
}
Loggers.RAFT.info("get datums from leader: {}, batch size is {}, processedCount is {}, datums' size is {}, RaftCore.datums' size is {}"
, getLeader().ip, batch.size(), processedCount, beatDatums.size(), datums.size());
// update datum entry
//如果说攒足了50个实例,或者所有的实例都遍历完了,则发送信息给leader端的接口
String url = buildURL(remote.ip, API_GET) + "?keys=" + URLEncoder.encode(keys, "UTF-8");
HttpClient.asyncHttpGet(url, null, null, new AsyncCompletionHandler<Integer>() {
@Override
public Integer onCompleted(Response response) throws Exception {
//通知leader,并返回leader的响应之后
if (response.getStatusCode() != HttpURLConnection.HTTP_OK) {
return 1;
}
//leader端将这些keys对应的实例Datum返回
List<JSONObject> datumList = JSON.parseObject(response.getResponseBody(), new TypeReference<List<JSONObject>>() {
});
for (JSONObject datumJson : datumList) {
OPERATE_LOCK.lock();
Datum newDatum = null;
try {
//本地该key对应的Datum
Datum oldDatum = getDatum(datumJson.getString("key"));
//如果说本地比较新,则不更新
if (oldDatum != null && datumJson.getLongValue("timestamp") <= oldDatum.timestamp.get()) {
Loggers.RAFT.info("[NACOS-RAFT] timestamp is smaller than that of mine, key: {}, remote: {}, local: {}",
datumJson.getString("key"), datumJson.getLongValue("timestamp"), oldDatum.timestamp);
continue;
}
//如果key是元数据key以如下字符串开头
//com.alibaba.nacos.naming.domains.meta.
//meta.
if (KeyBuilder.matchServiceMetaKey(datumJson.getString("key"))) {
Datum<Service> serviceDatum = new Datum<>();
serviceDatum.key = datumJson.getString("key");
serviceDatum.timestamp.set(datumJson.getLongValue("timestamp"));
serviceDatum.value =
JSON.parseObject(JSON.toJSONString(datumJson.getJSONObject("value")), Service.class);
newDatum = serviceDatum;
}
//如果key以如下字符串开头
//com.alibaba.nacos.naming.iplist.
//iplist.
if (KeyBuilder.matchInstanceListKey(datumJson.getString("key"))) {
Datum<Instances> instancesDatum = new Datum<>();
instancesDatum.key = datumJson.getString("key");
instancesDatum.timestamp.set(datumJson.getLongValue("timestamp"));
instancesDatum.value =
JSON.parseObject(JSON.toJSONString(datumJson.getJSONObject("value")), Instances.class);
newDatum = instancesDatum;
}
if (newDatum == null || newDatum.value == null) {
Loggers.RAFT.error("receive null datum: {}", datumJson);
continue;
}
//更新本地的datum
//将该Datum持久化到磁盘
raftStore.write(newDatum);
//写入本follower内存,会覆盖原来的datum
datums.put(newDatum.key, newDatum);
//发送消息通知,将该实例改变时间放入notifier的内部阻塞队列中,内部线程不断获取队列数据做相应的处理
notifier.addTask(newDatum.key, ApplyAction.CHANGE);
//再次重置
local.resetLeaderDue();
//设置term
if (local.term.get() + 100 > remote.term.get()) {
getLeader().term.set(remote.term.get());
local.term.set(getLeader().term.get());
} else {
local.term.addAndGet(100);
}
//刷到磁盘
raftStore.updateTerm(local.term.get());
Loggers.RAFT.info("data updated, key: {}, timestamp: {}, from {}, local term: {}",
newDatum.key, newDatum.timestamp, JSON.toJSONString(remote), local.term);
} catch (Throwable e) {
Loggers.RAFT.error("[RAFT-BEAT] failed to sync datum from leader, datum: {}", newDatum, e);
} finally {
//最后释放锁
OPERATE_LOCK.unlock();
}
}
TimeUnit.MILLISECONDS.sleep(200);
return 0;
}
});
batch.clear();
} catch (Exception e) {
Loggers.RAFT.error("[NACOS-RAFT] failed to handle beat entry, key: {}", datumKey);
}
}
//清除以死的实例
List<String> deadKeys = new ArrayList<>();
for (Map.Entry<String, Integer> entry : receivedKeysMap.entrySet()) {
if (entry.getValue() == 0) {
deadKeys.add(entry.getKey());
}
}
for (String deadKey : deadKeys) {
try {
deleteDatum(deadKey);
} catch (Exception e) {
Loggers.RAFT.error("[NACOS-RAFT] failed to remove entry, key={} {}", deadKey, e);
}
}
}
return local;
}
几个重要的操作:
1.每次收到leader的心跳后,follower就重置本地的leaderDueMs,该值用来决定是否leader已经挂了,从而确定是否触发选举。长时间没有收到心跳的话就会在选举线程(GlobalExecutor.registerMasterElection(new MasterElection());)每次减500ms的情况下,减到负数,触发选举。
2.更新本地的term。
3.将与本地有差异的实例key拿到,再次请求leader,将这些key对应的实例发送过来,更新本地内存注册表和磁盘文件,失效的实例删除。
4.最后返回本地实例元数据peer。
其中涉及到第三步批量发送请求的部分,需要leader端有服务接口处理该请求,从url拼接来看,该接口是RaftController的一个GET请求:
@NeedAuth
@RequestMapping(value = "/datum", method = RequestMethod.GET)
public String get(HttpServletRequest request, HttpServletResponse response) throws Exception {
response.setHeader("Content-Type", "application/json; charset=" + getAcceptEncoding(request));
response.setHeader("Cache-Control", "no-cache");
response.setHeader("Content-Encode", "gzip");
String keysString = WebUtils.required(request, "keys");
keysString = URLDecoder.decode(keysString, "UTF-8");
String[] keys = keysString.split(",");
List<Datum> datums = new ArrayList<Datum>();
for (String key : keys) {
Datum datum = raftCore.getDatum(key);
datums.add(datum);
}
return JSON.toJSONString(datums);
}
只是简单的把这些key对应的instace返回给follower。
这样,心跳与选举的关系就串到一起了。