Leader.go
func (rf *Raft) leaderCommit() {
if rf.role != LEADER {
return
}
// find the first entry in current term
minIdx := 0
for i := len(rf.log) - 1; i > 0; i-- {
if rf.log[i].Term == rf.currentTerm {
minIdx = i
} else if rf.log[i].Term < rf.currentTerm {
break
} else {
// already lost leadership, but haven't applied this change
rf.logger.Trace.Printf("get term %v > current term %v, in server %v, is leader %v\n", rf.log[i].Term, rf.currentTerm, rf.me, rf.role == LEADER)
return
}
}
if minIdx == 0 {
// can't find entry in current term
// unsafe to commit
return
}
minIdx += int(rf.startIdx)
// find the safe upper bound
upperBound := rf.commitIdx
for minIdx < len(rf.log)+int(rf.startIdx) {
replicatedNum := 1
safe := false
// loop all peers to check whether this entry is replicated
for i := 0; i < len(rf.peers); i++ {
if i == rf.me {
continue
}
if int(rf.matchIdx[i]) >= minIdx {
// entry minIdx has replicated in server i
replicatedNum++
if replicatedNum > len(rf.peers)/2 {
// replicated in the majority
safe = true
upperBound = uint64(minIdx)
minIdx++
break
}
}
}
if !safe {
break
}
}
cId := rf.commitIdx + 1
rf.logger.Trace.Printf("leader %v upperbound %v min %v\n", rf.me, upperBound, cId)
for cId <= upperBound {
if cId >= uint64(len(rf.log))+rf.startIdx {
rf.logger.Error.Fatalln("out of bound")
}
rf.logger.Trace.Printf("leader %v commit %v %v", rf.me, cId, rf.log[cId-rf.startIdx])
rf.applyCh <- ApplyMsg{int(cId), rf.log[cId-rf.startIdx].Command, false, nil}
rf.commitIdx = cId
rf.persist()
cId++
}
}
其中startIndex表示raft中的server进行snapshot后,之前的log被丢弃。
例如,此时startIndex值为10,也就是0~9的值被丢弃,下次故障恢复从index值为10开始,log数组长度定长。因此,下标也是从startIndex + len(rf.log)
状态 | 所有服务器上持久存在的 |
---|---|
currentTerm | 服务器最后一次知道的任期号(初始化为 0,持续递增) |
votedFor | 在当前获得选票的候选人的 Id |
log[] | 日志条目集;每一个条目包含一个用户状态机执行的指令,和收到时的任期号 |
状态 | 所有服务器上经常变的 |
---|---|
commitIndex |