领导选举部分需要防范的是, 候选人是拜占庭节点,伪造持有最新的日志骗取选票
解决思路是
先发起一次PreRequestVote, Follower响应自身最新的已提交日志Term, Index作为验证要求, 然后Leader发送RequestVote, 其中带上自己算出来的要求日志hash签名, Follower校验后进行投票
在6.824基础的Raft上进行几处修改
增加
签名函数
func SHA256(elem interface{}) (string, error) {
buf := GetBytes(elem)
sum := sha256.Sum256(buf)
ret := fmt.Sprintf("%x", sum)
return ret, nil
}
投票与预投票结构体
// 预投票阶段,请求其他节点给自己投票,但是还需要Candidate提供Committed证明后Follower才会真正投票
type PreRequestVoteArgs struct {
Term int // Candidate Term
CandidateId int // Candidate Id
LastCommittedLogTerm int // Candidate当前最后一个已经 Committed 日志项的 Term(可能伪造)
LastCommittedLogIndex int // Candidate当前最后一个已经 Committed 日志项的 Index(可能伪造)
}
// 预投票响应,包含待证明日志项的 Index 和 Term。
type PreRequestVoteReply struct {
Success bool
ReceiverId int
ReceiverLastCommittedLogTerm int
ReceiverLastCommittedLogIndex int
}
// example RequestVote RPC arguments structure.
// field names must start with capital letters!
type RequestVoteArgs struct {
// Your data here (PartA, PartB).
Term int
CandidateId int
// Committed 证明
LastCommittedLogTerm int // 对方节点最后一个已经 Committed 日志项的 Term
LastCommittedLogIndex int // 对方节点最后一个已经 Committed 日志项的 Index
LastCommittedLogHash string // 对方节点最后一个已经 Committed 日志项的 Hash
}
// example RequestVote RPC reply structure.
// field names must start with capital letters!
type RequestVoteReply struct {
// Your data here (PartA).
Term int //被请求者的任期号,如果发现自己的任期号比被请求的小,就会把自己变成Follower
VoteGranted bool //候选人赢得了此张选票时为真
}
新增预投票RPC调用, Follower响应自身持有的最新日志Term和Index, Leader据此找出该日志项进行哈希签名返回给Follower, 包含在RequestVote请求中
func (rf *Raft) PreRequestVoteArgs(args PreRequestVoteArgs, reply *PreRequestVoteReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
reply.Success = false // 默认为失败
if args.Term < rf.currentTerm {
LOG(rf.me, rf.currentTerm, DVote, "PreRequestVoteArgs : args.Term < rf.currentTerm")
return
}
if args.Term > rf.currentTerm {
rf.becomeFollowerLocked(args.Term)
}
// 检查候选人的日志是否最新
if !rf.isMoreUpToDateLocked(args.LastCommittedLogIndex, args.LastCommittedLogTerm) {
reply.ReceiverId = rf.me
reply.ReceiverLastCommittedLogTerm = rf.log[rf.commitIndex].Term
reply.ReceiverLastCommittedLogIndex = rf.commitIndex
reply.Success = true
}
}
func (rf *Raft) sendPreRequestVoteArgs(server int, args PreRequestVoteArgs, reply *PreRequestVoteReply) bool {
ok := rf.peers[server].Call("Raft.PreRequestVoteArgs", args, reply)
if !ok {
return ok
}
if reply.Success {
// if reply.ReceiverLastCommittedLogTerm == 0 || reply.ReceiverLastCommittedLogIndex == 0 {
requestVoteArgs := RequestVoteArgs{}
requestVoteArgs.CandidateId = rf.me
requestVoteArgs.Term = rf.currentTerm
requestVoteArgs.LastCommittedLogTerm = reply.ReceiverLastCommittedLogTerm
requestVoteArgs.LastCommittedLogIndex = reply.ReceiverLastCommittedLogIndex
requestVoteArgs.LastCommittedLogHash, _ = SHA256(rf.log[reply.ReceiverLastCommittedLogIndex])
requestVoteReply := RequestVoteReply{}
go func() {
//fmt.Printf("Server [%d] begin sendRequestVote: Current Time: %v\n", rf.me, time.Now().UnixNano()/1000000)
rf.sendRequestVote(reply.ReceiverId, &requestVoteArgs, &requestVoteReply)
}()
//}
}
return ok
}
在RequestVote中, 新增记录验证, 确保Leader真的持有最新已提交记录
Begin Committed 证明
hash, _ := SHA256(rf.log[rf.commitIndex])
if args.LastCommittedLogTerm != rf.log[rf.commitIndex].Term ||
args.LastCommittedLogIndex != rf.commitIndex && args.LastCommittedLogHash == hash {
reply.Term = args.Term
reply.VoteGranted = false
return
}
End Committed 证明
修改选举启动部分, 增加对预投票结果的统计
引入一个voteCh通道,用于处理并发投票的结果
//开始选举,先向所有节点发送PreRequestVote请求
func (rf *Raft) startElection(term int) {
rf.mu.Lock()
myLastLogIndex, myLastLogTerm := len(rf.log)-1, rf.log[len(rf.log)-1].Term
rf.mu.Unlock()
votes := 1 // 假设自己已经投了一票
voteCh := make(chan bool, len(rf.peers)-1)
for peer := range rf.peers {
if peer == rf.me {
continue
}
go func(peer int) {
preArgs := PreRequestVoteArgs{
Term: term,
CandidateId: rf.me,
LastCommittedLogTerm: myLastLogTerm,
LastCommittedLogIndex: myLastLogIndex,
}
preReply := PreRequestVoteReply{}
if ok := rf.sendPreRequestVoteArgs(peer, preArgs, &preReply); ok && preReply.Success {
args := RequestVoteArgs{
Term: term,
CandidateId: rf.me,
LastCommittedLogTerm: preReply.ReceiverLastCommittedLogTerm,
LastCommittedLogIndex: preReply.ReceiverLastCommittedLogIndex,
}
reply := RequestVoteReply{}
if rf.sendRequestVote(peer, &args, &reply) && reply.VoteGranted {
voteCh <- true
} else {
voteCh <- false
}
} else {
voteCh <- false
}
}(peer)
}
// 计算收到的投票
for i := 0; i < len(rf.peers)-1; i++ {
if granted := <-voteCh; granted {
votes++
if votes > len(rf.peers)/2 {
rf.mu.Lock()
if rf.currentTerm == term {
rf.becomeLeaderLocked()
}
rf.mu.Unlock()
break
}
}
}
}
测试通过
debug:
在election部分, becomeLeader后立即发送一轮心跳, 忘了加这个导致PartB失效, 找了半天才意识到是这里的遗漏