枯木逢春不在茂
年少且惜镜边人
写在前面
lab3B需要实现一个快照功能,这需要对raft的代码需要进行更改,比如在发送心跳的时候,如果nextindex是快照里面的内容则就需要发送快照给peer,持久化也要进行修改,算是对raft加整个系统进行了大优化吧,要独立完成这个任务,很难(对我来说)。所以还是参考了大佬的代码。
实现过程
首先在Raft层引入了LastIncludedIndex和LastIncludedTerm两个持久化状态
kv层需要开启快照发送loop,这里注意判断raft log长度时不要加KV锁,否则会出现Raft和KV层死锁
func (kv *KVServer) snapshotLoop() {
for !kv.killed() {
var snapshot []byte
var lastIncludedIndex int
// 锁内dump snapshot
func() {
// 如果raft log超过了maxraftstate大小,那么对kvStore快照下来
if kv.maxraftstate != -1 && kv.rf.ExceedLogSize(kv.maxraftstate) { // 这里调用ExceedLogSize不要加kv锁,否则会死锁
// 锁内快照,离开锁通知raft处理
kv.mu.Lock()
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
e.Encode(kv.kvStore) // kv键值对
e.Encode(kv.seqMap) // 当前各客户端最大请求编号,也要随着snapshot走
snapshot = w.Bytes()
lastIncludedIndex = kv.lastAppliedIndex
DPrintf("KVServer[%d] KVServer dump snapshot, snapshotSize[%d] lastAppliedIndex[%d]", kv.me, len(snapshot), kv.lastAppliedIndex)
kv.mu.Unlock()
}
}()
// 锁外通知raft层截断,否则有死锁
if snapshot != nil {
// 通知raft落地snapshot并截断日志(都是已提交的日志,不会因为主从切换截断,放心操作)
kv.rf.TakeSnapshot(snapshot, lastIncludedIndex)
}
time.Sleep(10 * time.Millisecond)
}
}
sever层检测raft日志长度,超过日志时,则直接打包快照,下降到让raft进行截断。
// 保存snapshot,截断log
func (rf *Raft) TakeSnapshot(snapshot []byte, lastIncludedIndex int) {
rf.mu.Lock()
defer rf.mu.Unlock()
// 已经有更大index的snapshot了
if lastIncludedIndex <= rf.lastIncludedIndex {
return
}
// 快照的当前元信息
DPrintf("RafeNode[%d] TakeSnapshot begins, IsLeader[%v] snapshotLastIndex[%d] lastIncludedIndex[%d] lastIncludedTerm[%d]",
rf.me, rf.leaderId==rf.me, lastIncludedIndex, rf.lastIncludedIndex, rf.lastIncludedTerm)
// 要压缩的日志长度
compactLogLen := lastIncludedIndex - rf.lastIncludedIndex
// 更新快照元信息
rf.lastIncludedTerm = rf.log[rf.index2LogPos(lastIncludedIndex)].Term
rf.lastIncludedIndex = lastIncludedIndex
// 压缩日志
afterLog := make([]LogEntry, len(rf.log) - compactLogLen)
copy(afterLog, rf.log[compactLogLen:])
rf.log = afterLog
// 把snapshot和raftstate持久化
rf.persister.SaveStateAndSnapshot(rf.raftStateForPersist(), snapshot)
DPrintf("RafeNode[%d] TakeSnapshot ends, IsLeader[%v] snapshotLastIndex[%d] lastIncludedIndex[%d] lastIncludedTerm[%d]",
rf.me, rf.leaderId==rf.me, lastIncludedIndex, rf.lastIncludedIndex, rf.lastIncludedTerm)
}
截断日志,并且更新快照raft的lastIncludedTerm和lastIncludedIndex信息。
// 日志是否需要压缩
func (rf *Raft) ExceedLogSize(logSize int) bool {
rf.mu.Lock()
defer rf.mu.Unlock()
if rf.persister.RaftStateSize() >= logSize {
return true
}
return false
}
raftstatesize=log+currentTerm+voteFor加起来的大小
kV层安装快照处理
func (kv *KVServer) applyLoop() {
for !kv.killed() {
select {
case msg := <-kv.applyCh:
// 如果是安装快照
if !msg.CommandValid {
func() {
kv.mu.Lock()
defer kv.mu.Unlock()
if len(msg.Snapshot) == 0 { // 空快照,清空数据
kv.kvStore = make(map[string]string)
kv.seqMap = make(map[int64]int64)
} else {
// 反序列化快照, 安装到内存
r := bytes.NewBuffer(msg.Snapshot)
d := labgob.NewDecoder(r)
d.Decode(&kv.kvStore)
d.Decode(&kv.seqMap)
}
// 已应用到哪个索引
kv.lastAppliedIndex = msg.LastIncludedIndex
DPrintf("KVServer[%d] installSnapshot, kvStore[%v], seqMap[%v] lastAppliedIndex[%v]", kv.me, len(kv.kvStore), len(kv.seqMap), kv.lastAppliedIndex)
}()
} else { // 如果是普通log
cmd := msg.Command
index := msg.CommandIndex
func() {
kv.mu.Lock()
defer kv.mu.Unlock()
// 更新已经应用到的日志
kv.lastAppliedIndex = index
// 操作日志
op := cmd.(*Op)
opCtx, existOp := kv.reqMap[index]
prevSeq, existSeq := kv.seqMap[op.ClientId]
kv.seqMap[op.ClientId] = op.SeqId
if existOp { // 存在等待结果的RPC, 那么判断状态是否与写入时一致
if opCtx.op.Term != op.Term {
opCtx.wrongLeader = true
}
}
// 只处理ID单调递增的客户端写请求
if op.Type == OP_TYPE_PUT || op.Type == OP_TYPE_APPEND {
if !existSeq || op.SeqId > prevSeq { // 如果是递增的请求ID,那么接受它的变更
if op.Type == OP_TYPE_PUT { // put操作
kv.kvStore[op.Key] = op.Value
} else if op.Type == OP_TYPE_APPEND { // put-append操作
if val, exist := kv.kvStore[op.Key]; exist {
kv.kvStore[op.Key] = val + op.Value
} else {
kv.kvStore[op.Key] = op.Value
}
}
} else if existOp {
opCtx.ignored = true
}
} else { // OP_TYPE_GET
if existOp {
opCtx.value, opCtx.keyExist = kv.kvStore[op.Key]
}
}
DPrintf("KVServer[%d] applyLoop, kvStore[%v]", kv.me, len(kv.kvStore))
// 唤醒挂起的RPC
if existOp {
close(opCtx.committed)
}
}()
}
}
}
}
这里比之前多添加了快照处理的情况。此时相当于lastIncludedIndex之前的日志被提交到KV层,后续继续等待Raft层提交的Log即可
raft层启动snapshot
func Make(peers []*labrpc.ClientEnd, me int,
persister *Persister, applyCh chan ApplyMsg) *Raft {
rf := &Raft{}
rf.peers = peers
rf.persister = persister
rf.me = me
// Your initialization code here (2A, 2B, 2C).
rf.role = ROLE_FOLLOWER
rf.leaderId = -1
rf.votedFor = -1
rf.lastActiveTime = time.Now()
rf.lastIncludedIndex = 0
rf.lastIncludedTerm = 0
rf.applyCh = applyCh
// rf.nextIndex = make([]int, len(rf.peers))
// rf.matchIndex = make([]int, len(rf.peers))
// initialize from state persisted before a crash
rf.readPersist(persister.ReadRaftState())
//rf.installSnapshotToApplication()
DPrintf("RaftNode[%d] Make again", rf.me)
// start ticker goroutine to start elections
go rf.electionLoop()
go rf.appendEntriesLoop()
go rf.applyLogLoop()
//go rf.ticker()
DPrintf("Raftnode[%d]启动", me)
return rf
}
func (rf *Raft) installSnapshotToApplication() {
var applyMsg *ApplyMsg
// 同步给application层的快照
applyMsg = &ApplyMsg{
CommandValid: false,
Snapshot: rf.persister.ReadSnapshot(),
LastIncludedIndex: rf.lastIncludedIndex,
LastIncludedTerm: rf.lastIncludedTerm,
}
// 快照部分就已经提交给application了,所以后续applyLoop提交日志后移
rf.lastApplied = rf.lastIncludedIndex
DPrintf("RaftNode[%d] installSnapshotToApplication, snapshotSize[%d] lastIncludedIndex[%d] lastIncludedTerm[%d]",
rf.me, len(applyMsg.Snapshot), applyMsg.LastIncludedIndex, applyMsg.LastIncludedTerm)
rf.applyCh <- *applyMsg
return
}
这里的安装快照,是在发送快照心跳包给peer被成功接受后,发送快照。这在后面会将到
func (rf *Raft) raftStateForPersist() []byte {
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
e.Encode(rf.currentTerm)
e.Encode(rf.votedFor)
e.Encode(rf.log)
e.Encode(rf.lastIncludedIndex)
e.Encode(rf.lastIncludedTerm)
data := w.Bytes()
return data
}
更新persist函数 多快照了两个新增加的字段
// 最后的index
func (rf *Raft) lastIndex() int {
return rf.lastIncludedIndex + len(rf.log)
}
// 最后的term
func (rf *Raft) lastTerm() (lastLogTerm int) {
lastLogTerm = rf.lastIncludedTerm // for snapshot
if len(rf.log) != 0 {
lastLogTerm = rf.log[len(rf.log)-1].Term
}
return
}
// 日志index转化成log数组下标
func (rf *Raft) index2LogPos(index int) (pos int) {
return index - rf.lastIncludedIndex - 1
}
后续raft代码中加入了快照,所以所有涉及到len(rf.log)的代码的部分都要改为调用index2LogPos这个。
func (rf *Raft) applyLogLoop() {
/*
因为snapshot也会向applyCh投递消息,
为了保证安装snapshot到applyCh之后投递的Log是紧随snapshot位置之后的log,
因此lastApplied状态修改和Log投递必须都在锁内,锁外投递将导致snapshot和log乱序,
导致提交时序混乱。
*/
noMore := false
for !rf.killed() {
if noMore {
time.Sleep(10 * time.Millisecond)
}
//var appliedMsgs = make([]ApplyMsg, 0)
func() {
rf.mu.Lock()
defer rf.mu.Unlock()
noMore = true
for rf.commitIndex > rf.lastApplied {
rf.lastApplied += 1
appliedIndex := rf.index2LogPos(rf.lastApplied)
// appliedMsgs = append(appliedMsgs, ApplyMsg{
// CommandValid: true,
// Command: rf.log[appliedIndex].Command,
// CommandIndex: rf.lastApplied,
// CommandTerm: rf.log[appliedIndex].Term,
// })
appliedMsg := ApplyMsg{
CommandValid: true,
Command: rf.log[appliedIndex].Command,
CommandIndex: rf.lastApplied,
CommandTerm: rf.log[appliedIndex].Term,
}
rf.applyCh <- appliedMsg
// 引入snapshot后,这里必须在锁内投递了,否则会和snapshot的交错产生bug
//DPrintf("RaftNode[%d] applyLog, currentTerm[%d] lastApplied[%d] commitIndex[%d]", rf.me, rf.currentTerm, rf.lastApplied, rf.commitIndex)
DPrintf("RaftNode[%d] applyLog, currentTerm[%d] lastApplied[%d] commitIndex[%d]", rf.me, rf.currentTerm, rf.lastApplied, rf.commitIndex)
noMore = false
}
}()
}
}
引入快照后必须在所内投log日志。下面是发送心跳机制修改,因为要添加快照所以要多一个分支去处理快照机制
func (rf *Raft) appendEntriesLoop() {
for !rf.killed() {
time.Sleep(10 * time.Millisecond)
func() {
rf.mu.Lock()
defer rf.mu.Unlock()
// 只有leader才向外广播心跳
if rf.role != ROLE_LEADER {
return
}
// 100ms广播1次
now := time.Now()
if now.Sub(rf.lastBroadcastTime) < 100*time.Millisecond {
return
}
rf.lastBroadcastTime = time.Now()
// 向所有follower发送心跳
for peerId := 0; peerId < len(rf.peers); peerId++ {
if peerId == rf.me {
continue
}
// 如果nextIndex在leader的snapshot内,那么直接同步snapshot
if rf.nextIndex[peerId] <= rf.lastIncludedIndex {
rf.doInstallSnapshot(peerId)
} else { // 否则同步日志
rf.doAppendEntries(peerId)
}
}
}()
}
}
如果lastIncludedIndex大于给这个peer发送的日志,那么直接告诉让peer安装leader的日志就行,否则和之前的操作一样。
func (rf *Raft) doAppendEntries(peerId int) {
args := AppendEntriesArgs{}
args.Term = rf.currentTerm
args.LeaderId = rf.me
args.LeaderCommit = rf.commitIndex
args.Entries = make([]LogEntry, 0)
args.PrevLogIndex = rf.nextIndex[peerId] - 1
// 如果prevLogIndex是leader快照的最后1条log, 那么取快照的最后1个term
if args.PrevLogIndex == rf.lastIncludedIndex {
args.PrevLogTerm = rf.lastIncludedTerm
} else { // 否则一定是log部分
args.PrevLogTerm = rf.log[rf.index2LogPos(args.PrevLogIndex)].Term
}
args.Entries = append(args.Entries, rf.log[rf.index2LogPos(args.PrevLogIndex+1):]...)
DPrintf("RaftNode[%d] appendEntries starts, currentTerm[%d] peer[%d] logIndex=[%d] nextIndex[%d] matchIndex[%d] args.Entries[%d] commitIndex[%d]",
rf.me, rf.currentTerm, peerId, rf.lastIndex(), rf.nextIndex[peerId], rf.matchIndex[peerId], len(args.Entries), rf.commitIndex)
go func() {
// DPrintf("RaftNode[%d] appendEntries starts, myTerm[%d] peerId[%d]", rf.me, args1.Term, id)
reply := AppendEntriesReply{}
if ok := rf.sendAppendEntries(peerId, &args, &reply); ok {
rf.mu.Lock()
defer rf.mu.Unlock()
defer func() {
DPrintf("RaftNode[%d] appendEntries ends, currentTerm[%d] peer[%d] logIndex=[%d] nextIndex[%d] matchIndex[%d] commitIndex[%d]",
rf.me, rf.currentTerm, peerId, rf.lastIndex(), rf.nextIndex[peerId], rf.matchIndex[peerId], rf.commitIndex)
}()
// 如果不是rpc前的leader状态了,那么啥也别做了
if rf.currentTerm != args.Term {
return
}
if reply.Term > rf.currentTerm { // 变成follower
rf.role = ROLE_FOLLOWER
rf.leaderId = -1
rf.currentTerm = reply.Term
rf.votedFor = -1
rf.persist()
return
}
// 因为RPC期间无锁, 可能相关状态被其他RPC修改了
// 因此这里得根据发出RPC请求时的状态做更新,而不要直接对nextIndex和matchIndex做相对加减
if reply.Success { // 同步日志成功
rf.nextIndex[peerId] = args.PrevLogIndex + len(args.Entries) + 1
rf.matchIndex[peerId] = rf.nextIndex[peerId] - 1
rf.updateCommitIndex() // 更新commitIndex
} else {
// 回退优化,参考:https://thesquareplanet.com/blog/students-guide-to-raft/#an-aside-on-optimizations
nextIndexBefore := rf.nextIndex[peerId] // 仅为打印log
if reply.ConflictTerm != -1 { // follower的prevLogIndex位置term冲突了
// 我们找leader log中conflictTerm最后出现位置,如果找到了就用它作为nextIndex,否则用follower的conflictIndex
conflictTermIndex := -1
for index := args.PrevLogIndex; index > rf.lastIncludedIndex; index-- {
if rf.log[rf.index2LogPos(index)].Term == reply.ConflictTerm {
conflictTermIndex = index
break
}
}
if conflictTermIndex != -1 { // leader log出现了这个term,那么从这里prevLogIndex之前的最晚出现位置尝试同步
rf.nextIndex[peerId] = conflictTermIndex
} else {
rf.nextIndex[peerId] = reply.ConflictIndex // 用follower首次出现term的index作为同步开始
}
} else {
// follower没有发现prevLogIndex term冲突, 可能是被snapshot了或者日志长度不够
// 这时候我们将返回的conflictIndex设置为nextIndex即可
rf.nextIndex[peerId] = reply.ConflictIndex
}
DPrintf("RaftNode[%d] back-off nextIndex, peer[%d] nextIndexBefore[%d] nextIndex[%d]", rf.me, peerId, nextIndexBefore, rf.nextIndex[peerId])
}
}
}()
}
这个函数的复杂之处在于,我们需要考虑prevLogIndex恰好是snapshot最后一条log的情况(这种情况的处理体现在AppendEntries),其他的响应reply操作和之前处理log一样
// 已兼容snapshot
func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
DPrintf("RaftNode[%d] Handle AppendEntries, LeaderId[%d] Term[%d] CurrentTerm[%d] role=[%s] logIndex[%d] prevLogIndex[%d] prevLogTerm[%d] commitIndex[%d] Entries[%v]",
rf.me, rf.leaderId, args.Term, rf.currentTerm, rf.role, rf.lastIndex(), args.PrevLogIndex, args.PrevLogTerm, rf.commitIndex, args.Entries)
reply.Term = rf.currentTerm
reply.Success = false
reply.ConflictIndex = -1
reply.ConflictTerm = -1
defer func() {
DPrintf("RaftNode[%d] Return AppendEntries, LeaderId[%d] Term[%d] CurrentTerm[%d] role=[%s] logIndex[%d] prevLogIndex[%d] prevLogTerm[%d] Success[%v] commitIndex[%d] log[%v] ConflictIndex[%d]",
rf.me, rf.leaderId, args.Term, rf.currentTerm, rf.role, rf.lastIndex(), args.PrevLogIndex, args.PrevLogTerm, reply.Success, rf.commitIndex, len(rf.log), reply.ConflictIndex)
}()
if args.Term < rf.currentTerm {
return
}
// 发现更大的任期,则转为该任期的follower
if args.Term > rf.currentTerm {
rf.currentTerm = args.Term
rf.role = ROLE_FOLLOWER
rf.votedFor = -1
rf.persist()
// 继续向下走
}
// 认识新的leader
rf.leaderId = args.LeaderId
// 刷新活跃时间
rf.lastActiveTime = time.Now()
// 如果prevLogIndex在快照内,且不是快照最后一个log,那么只能从index=1开始同步了
if args.PrevLogIndex < rf.lastIncludedIndex {
reply.ConflictIndex = 1
return
} else if args.PrevLogIndex == rf.lastIncludedIndex { // prevLogIndex正好等于快照的最后一个log
if args.PrevLogTerm != rf.lastIncludedTerm { // 冲突了,那么从index=1开始同步吧
reply.ConflictIndex = 1
return
}
// 否则继续走后续的日志覆盖逻辑
} else { // prevLogIndex在快照之后,那么进一步判定
if args.PrevLogIndex > rf.lastIndex() { // prevLogIndex位置没有日志的case
reply.ConflictIndex = rf.lastIndex() + 1
return
}
// prevLogIndex位置有日志,那么判断term必须相同,否则false
if rf.log[rf.index2LogPos(args.PrevLogIndex)].Term != args.PrevLogTerm {
reply.ConflictTerm = rf.log[rf.index2LogPos(args.PrevLogIndex)].Term
for index := rf.lastIncludedIndex + 1; index <= args.PrevLogIndex; index++ { // 找到冲突term的首次出现位置,最差就是PrevLogIndex
if rf.log[rf.index2LogPos(index)].Term == reply.ConflictTerm {
reply.ConflictIndex = index
break
}
}
return
}
// 否则继续走后续的日志覆盖逻辑
}
// 保存日志
for i, logEntry := range args.Entries {
index := args.PrevLogIndex + 1 + i
logPos := rf.index2LogPos(index)
if index > rf.lastIndex() { // 超出现有日志长度,继续追加
rf.log = append(rf.log, logEntry)
} else { // 重叠部分
if rf.log[logPos].Term != logEntry.Term {
rf.log = rf.log[:logPos] // 删除当前以及后续所有log
rf.log = append(rf.log, logEntry) // 把新log加入进来
} // term一样啥也不用做,继续向后比对Log
}
}
rf.persist()
// 更新提交下标
if args.LeaderCommit > rf.commitIndex {
rf.commitIndex = args.LeaderCommit
if rf.lastIndex() < rf.commitIndex {
rf.commitIndex = rf.lastIndex()
}
}
reply.Success = true
}
这里多的处理是
if args.PrevLogIndex < rf.lastIncludedIndex {
reply.ConflictIndex = 1
return
} else if args.PrevLogIndex == rf.lastIncludedIndex { // prevLogIndex正好等于快照的最后一个log
if args.PrevLogTerm != rf.lastIncludedTerm { // 冲突了,那么从index=1开始同步吧
reply.ConflictIndex = 1
return
}
如果发送的条目在peer的快照内或者刚好是peer的快照内的最后一个log,但是周期不一样,则直接发送从leader的第一个条日志重发。就是如果follower没有能力比对prevLogIndex位置的term是否冲突(prevLogIndex位于follower的snapshot范围内),那么就让leader把自己的snapshot发过来吧(也就是让conflictIndex=1)。
DoinstallSnapshot
func (rf *Raft) doInstallSnapshot(peerId int) {
DPrintf("RaftNode[%d] doInstallSnapshot starts, leaderId[%d] peerId[%d]\n", rf.me, rf.leaderId, peerId)
args := InstallSnapshotArgs{}
args.Term = rf.currentTerm
args.LeaderId = rf.me
args.LastIncludedIndex = rf.lastIncludedIndex
args.LastIncludedTerm = rf.lastIncludedTerm
args.Offset = 0
args.Data = rf.persister.ReadSnapshot()
args.Done = true
reply := InstallSnapshotReply{}
go func() {
if rf.sendInstallSnapshot(peerId, &args, &reply) {
rf.mu.Lock()
defer rf.mu.Unlock()
// 如果不是rpc前的leader状态了,那么啥也别做了
if rf.currentTerm != args.Term {
return
}
if reply.Term > rf.currentTerm { // 变成follower
rf.role = ROLE_FOLLOWER
rf.leaderId = -1
rf.currentTerm = reply.Term
rf.votedFor = -1
rf.persist()
return
}
rf.nextIndex[peerId] = rf.lastIndex() + 1 // 重新从末尾同步log(未经优化,但够用)
rf.matchIndex[peerId] = args.LastIncludedIndex // 已同步到的位置(未经优化,但够用)
rf.updateCommitIndex() // 更新commitIndex
DPrintf("RaftNode[%d] doInstallSnapshot ends, leaderId[%d] peerId[%d] nextIndex[%d] matchIndex[%d] commitIndex[%d]\n", rf.me, rf.leaderId, peerId, rf.nextIndex[peerId],
rf.matchIndex[peerId], rf.commitIndex)
}
}()
}
发送玩快照后,重新从leader的最后一条日志向该peer进行同步,对这种确实是没有优化过的,这个可能是导致测试点出现问题的原因。
// 安装快照RPC Handler
func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
DPrintf("RaftNode[%d] installSnapshot starts, rf.lastIncludedIndex[%d] rf.lastIncludedTerm[%d] args.lastIncludedIndex[%d] args.lastIncludedTerm[%d] logSize[%d]",
rf.me, rf.lastIncludedIndex, rf.lastIncludedTerm, args.LastIncludedIndex, args.LastIncludedTerm, len(rf.log))
reply.Term = rf.currentTerm
if args.Term < rf.currentTerm {
return
}
// 发现更大的任期,则转为该任期的follower
if args.Term > rf.currentTerm {
rf.currentTerm = args.Term
rf.role = ROLE_FOLLOWER
rf.votedFor = -1
rf.persist()
// 继续向下走
}
// 认识新的leader
rf.leaderId = args.LeaderId
// 刷新活跃时间
rf.lastActiveTime = time.Now()
// leader快照不如本地长,那么忽略这个快照
if args.LastIncludedIndex <= rf.lastIncludedIndex {
return
} else { // leader快照比本地快照长
if args.LastIncludedIndex < rf.lastIndex() { // 快照外还有日志,判断是否需要截断
if rf.log[rf.index2LogPos(args.LastIncludedIndex)].Term != args.LastIncludedTerm {
rf.log = make([]LogEntry, 0) // term冲突,扔掉快照外的所有日志
} else { // term没冲突,保留后续日志
leftLog := make([]LogEntry, rf.lastIndex() - args.LastIncludedIndex)
copy(leftLog, rf.log[rf.index2LogPos(args.LastIncludedIndex)+1:])
rf.log = leftLog
}
} else {
rf.log = make([]LogEntry, 0) // 快照比本地日志长,日志就清空了
}
}
// 更新快照位置
rf.lastIncludedIndex = args.LastIncludedIndex
rf.lastIncludedTerm = args.LastIncludedTerm
// 持久化raft state和snapshot
rf.persister.SaveStateAndSnapshot(rf.raftStateForPersist(), args.Data)
// snapshot提交给应用层
rf.installSnapshotToApplication()
DPrintf("RaftNode[%d] installSnapshot ends, rf.lastIncludedIndex[%d] rf.lastIncludedTerm[%d] args.lastIncludedIndex[%d] args.lastIncludedTerm[%d] logSize[%d]",
rf.me, rf.lastIncludedIndex, rf.lastIncludedTerm, args.LastIncludedIndex, args.LastIncludedTerm, len(rf.log))
}
本段代码的核心处理部分
if args.LastIncludedIndex <= rf.lastIncludedIndex {
return
else{
if args.LastIncludedIndex < rf.lastIndex() { // 快照外还有日志,判断是否需要截断
if rf.log[rf.index2LogPos(args.LastIncludedIndex)].Term != args.LastIncludedTerm {
rf.log = make([]LogEntry, 0) // term冲突,扔掉快照外的所有日志
} else { // term没冲突,保留后续日志
leftLog := make([]LogEntry, rf.lastIndex() - args.LastIncludedIndex)
copy(leftLog, rf.log[rf.index2LogPos(args.LastIncludedIndex)+1:])
rf.log = leftLog
}
} else {
rf.log = make([]LogEntry, 0) // 快照比本地日志长,日志就清空了
}
}
如果leader快照比本地快照小,则不做任何事情,如果快照大于本地快照却在本地日志范围内,判断周期是不是冲突然后进行截断,若冲突则扔掉所有日志,不冲突则截断。
如果leader快照大于本地日志本身则直接清空。
func (rf *Raft) updateCommitIndex() {
// 数字N, 让nextIndex[i]的大多数>=N
// peer[0]' index=2
// peer[1]' index=2
// peer[2]' index=1
// 1,2,2
// 更新commitIndex, 就是找中位数
sortedMatchIndex := make([]int, 0)
sortedMatchIndex = append(sortedMatchIndex, rf.lastIndex())
for i := 0; i < len(rf.peers); i++ {
if i == rf.me {
continue
}
sortedMatchIndex = append(sortedMatchIndex, rf.matchIndex[i])
}
sort.Ints(sortedMatchIndex)
newCommitIndex := sortedMatchIndex[len(rf.peers)/2]
// 如果index属于snapshot范围,那么不要检查term了,因为snapshot的一定是集群提交的
// 否则还是检查log的term是否满足条件
if newCommitIndex > rf.commitIndex && (newCommitIndex <= rf.lastIncludedIndex || rf.log[rf.index2LogPos(newCommitIndex)].Term == rf.currentTerm) {
rf.commitIndex = newCommitIndex
}
DPrintf("RaftNode[%d] updateCommitIndex, commitIndex[%d] matchIndex[%v]", rf.me, rf.commitIndex, sortedMatchIndex)
}
这个就是提取了一部分代码出一个函数中,另外要注意修改commitindex的条件,这里3B基本上就已经结束了。最终代码最终我会在csdn上给出,最终上传到我的github(lab2andlab3)。
写在后面(坑)
raft层持有rf.mu向applyCh写入可能阻塞,此时如果kv层出现一种代码逻辑是先拿到了kv.mu然后再去拿rf.mu的话,此时肯定无法拿到rf.mu(因为raft层持有rf.mu并阻塞在chan),而此刻kv层如果正在处理前一条log并试图加kv.mu,那么也无法拿到kv.mu,就会死锁。也就是文章开头的那段代码。
终于结束了,可以安心看论文看书了,加油,耀瑶要努力变好啊!