MIT 6.824 lab2B 2023总结

一.实验目标

要完成日志复制部分,也就是要修改appendAntries和requestvote部分

二.思路

requestvote投true的话,有两个条件:1.日志是最新的,2.当前term是最大的。如果有一个term大于leader的分区机器进来的话,先提升所有机器term,然后再选举,这是不稳定的,所有有了prevote优化,就是分区后candidate先联系别的机器看是否有可能当选leader,如果不可能的话,就不自增term,也就没可能分区时独自自增了。下面的是requestvote部分代码:

分析如下,也就是为了让日志都是leader流向follower进行的选举约束:那些commit的log一定在leader上,因为commit和requestvote都要满足大多数原则,所以有重叠,这样就一定存在可选leader.

func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
	// Your code here (2A, 2B).
	log.Println("Rpc requestVote start ", rf.me)
	rf.mu.Lock()
	log.Println("Rpc get lock", rf.me)
	defer rf.mu.Unlock()
	reply.Vote_Granted = false
	if rf.CurrentTerm > args.Candidate_Curr_Term || (rf.CurrentTerm == args.Candidate_Curr_Term && rf.VoteFor != -1 && rf.VoteFor != args.Candidate_Id) {
		reply.Current_Term = rf.CurrentTerm
		reply.Vote_Granted = false
		log.Println(args.Candidate_Id, " vote grand fail", rf.me)
		return
	}
	if args.Candidate_Curr_Term > rf.CurrentTerm {
		log.Println("request is higer!!!!")
		rf.ToFollower()
		rf.VoteFor = -1
		rf.CurrentTerm = args.Candidate_Curr_Term
	}
	// election limitation
	if args.Last_Log_Term < rf.Log_Array[len(rf.Log_Array)-1].Log_Term {
		reply.Current_Term = rf.CurrentTerm
		reply.Vote_Granted = false
		log.Println(args.Candidate_Id, " vote grand fail the log term not new", rf.me)
		return
	}
	if args.Last_Log_Term == rf.Log_Array[len(rf.Log_Array)-1].Log_Term && args.Last_Log_Index < len(rf.Log_Array)-1 {
		reply.Current_Term = rf.CurrentTerm
		reply.Vote_Granted = false
		log.Println(args.Candidate_Id, " vote grand fail the log length is small", rf.me)
		return
	}
	// 两个条件均满足投true

	rf.VoteFor = args.Candidate_Id
	reply.Current_Term = rf.CurrentTerm
	log.Println(args.Candidate_Id, " vote grand ", rf.me)
	rf.ResetElection()
	reply.Vote_Granted = true
	// TODO: term is used for the candidate to update itself

}

关于空心跳和带有日志的心跳,这里参考了谭神的blog,用了replicator协程来单独处理每个peer的log heartbeat。这样确实是大量减少了思维负担,神来之笔!

for peer := range rf.peers {
		rf.Next_Idx[peer] = 1
		rf.Match_Idx[peer] = 0
		if peer != rf.me {
			rf.ReplicatorCond[peer] = sync.NewCond(&sync.Mutex{})
			go rf.replicator(peer)
		}
	}



func (rf *Raft) replicator(peer int) {
	rf.ReplicatorCond[peer].L.Lock()
	defer rf.ReplicatorCond[peer].L.Unlock()
	for !rf.killed() {
		// if there is no need to replicate entries for this peer, just release CPU and wait other goroutine's signal if service adds new Command
		// if this peer needs replicating entries, this goroutine will call replicateOneRound(peer) multiple times until this peer catches up, and then wait
		for !rf.NeedReplicating(peer) {
			rf.ReplicatorCond[peer].Wait()
		}
		rf.replicateOneRound(peer)
	}
}

如果是log heartbeat的话,就向replicator发一个signal,然后执行一轮心跳复制,这样完美解决了一次复制不完的问题!

然后就是append部分也就是日志复制最主要的逻辑所在。

func (rf *Raft) AppendEntries(args *AppendArgs, reply *AppendReply) {
	rf.mu.Lock()
	reply.PrevLogIndex = args.PrevLogIndex
	log.Println("append ", rf.me)
	defer rf.mu.Unlock()
	if args.Leader_Term < rf.CurrentTerm {
		reply.Term = rf.CurrentTerm
		reply.Success = false
		return
	}
	if args.Leader_Term > rf.CurrentTerm {
		rf.CurrentTerm, rf.VoteFor = args.Leader_Term, -1
	}
	rf.ToFollower()
	// log.Println("append success", rf.me, "  ", time.Now())
	rf.ResetElection()

	//日志不match,删除prevlogIndex及其以后的log, 并返回false
	if args.PrevLogIndex != 0 {
		if args.PrevLogIndex >= len(rf.Log_Array) || rf.Log_Array[args.PrevLogIndex].Log_Term != args.PrevLogTerm {
			reply.Success = false
			reply.Term = args.Leader_Term
			// DPrintf("the follower{%d} log from {%v} to {%v} ", rf.me, rf.Log_Array, rf.Log_Array[:args.PrevLogIndex])
			if args.PrevLogIndex < len(rf.Log_Array) {
				rf.Log_Array = rf.Log_Array[:args.PrevLogIndex]
			}
			return
		}
	} else {
		DPrintf("prevlog_idx = 0")
	}
	// 从PrevLogIndex + 1也就是next_id开始追加存储
	// 检查是否需要截断添加,防止rpc乱序到达,截断
	if !rf.checkMyLog(args.PrevLogIndex, args.Entries) {
		rf.Log_Array = rf.Log_Array[:args.PrevLogIndex+1]
		rf.Log_Array = append(rf.Log_Array, args.Entries...)
	} else {
		DPrintf("the log before will cut the log, and branch here")
	}

	DPrintf("the follower{%d} log is success log{%v}............", rf.me, rf.Log_Array)
	// 设置本地commit为最新日志和leader_commit中较小的一个
	if rf.Committed_Idx < args.Leader_Commit {
		rf.Committed_Idx = int(math.Min(float64(args.Leader_Commit), float64(len(rf.Log_Array)-1)))
		rf.ApplyCond.Signal()
		DPrintf("Node{%v} commid{%v} change and notify", rf.me, rf.Committed_Idx)
	}
	//心跳内容
	rf.CurrentTerm = args.Leader_Term
	reply.Success = true
	//TODO:2,3,4,5 in the paper

}

如果日志不匹配的话,就删除后面的日志,然后下一次发生日志就是前一个term的最后一条日志,直到pre_log 为 0。如果匹配的话,不能阶段,因为考虑到rpc乱序到达。如果log长的先到,log短的后到,就截断了,所以多了一步检查。

再append rpc回复处理中也要对rpc乱序做个处理,sequence number就用pre_log来判断,用中位数来做commit_idx,如果变大的话,通知applier线程。感觉while + wait很适合用来做背景线程(协程),处理一些特殊的任务,Applier,和replicator都用上,这也是最大的收获。

func (rf *Raft) processAppendReply(peer int, args AppendArgs, reply AppendReply) {
	if reply.PrevLogIndex != rf.Next_Idx[peer]-1 {
		log.Printf("rpc order delay")
		return
	}
	if !reply.Success && reply.Term == args.Leader_Term {
		//日志不一致失败,减少next_id重试
		rf.Next_Idx[peer] = rf.GetIdxPreTerm(rf.Next_Idx[peer] - 1)
		log.Printf("Node{%v}'s next_idx become{%v}", rf.me, rf.Next_Idx[peer])
		return
	}
	//发现更大term的candidate, 转变为follwer
	if !reply.Success && reply.Term > args.Leader_Term {
		log.Println("find the leadr and change state to follower")
		rf.ToFollower()
		rf.CurrentTerm, rf.VoteFor = reply.Term, -1
		return
	}
	if !reply.Success && reply.Term < args.Leader_Term {
		log.Fatalf("reply term smaller")
	}
	if reply.Success {
		log.Printf("Leader Node{%v} receive the Node{%v} append success next_id{%v} log_len{%v}, add{%v}", rf.me, peer, rf.Next_Idx[peer], len(rf.Log_Array), len(args.Entries))
		// update next_id and math_id
		// 防止两个rpc同时到达乱序
		rf.Next_Idx[peer] += len(args.Entries)
		rf.Match_Idx[peer] = rf.Next_Idx[peer] - 1
		//取match_idx的中位数来做commit_idx,因为满足一半peers已经commit了
		DPrintf("match_array{%v}}", rf.Match_Idx)
		matchIdx := make([]int, 0)
		for i := 0; i < len(rf.peers); i++ {
			if rf.me != i {
				matchIdx = append(matchIdx, rf.Match_Idx[i])
			}
		}
		matchIdx = append(matchIdx, len(rf.Log_Array)-1)
		sort.Ints(matchIdx)
		commit_idx := matchIdx[(len(matchIdx))/2]
		DPrintf("match_array{%v} and commit_idx{%v}", rf.Match_Idx, commit_idx)
		if commit_idx > rf.Committed_Idx {
			DPrintf("Leader Node{%v} commit increase from{%v} to {%v} and signal", rf.me, rf.Committed_Idx, commit_idx)
			rf.Committed_Idx = commit_idx
			//通知applier协程
			rf.ApplyCond.Signal()
		}
	}
}

然后就是applier协程了

func (rf *Raft) Applier() {
	for !rf.killed() {
		rf.mu.Lock()
		for rf.Last_Applied_Idx >= rf.Committed_Idx || len(rf.Log_Array) <= rf.Committed_Idx {
			rf.ApplyCond.Wait()
			DPrintf("Node{%v}, last_applied{%v}, commited_idx{%v}", rf.me, rf.Last_Applied_Idx, rf.Committed_Idx)
			rf.Committed_Idx = int(math.Min(float64(rf.Committed_Idx), float64(len(rf.Log_Array)-1)))
			DPrintf("{log len{%d}, commit_id{%d}}", len(rf.Log_Array), rf.Committed_Idx)
		}
		DPrintf("Node{%d}commit_idx{%d} last_applied_idx{%d} log{%v}, logtoapply{%v}", rf.me, rf.Committed_Idx, rf.Last_Applied_Idx, rf.Log_Array, rf.Log_Array[rf.Last_Applied_Idx+1:rf.Committed_Idx+1])
		entries := make([]LogEntry, rf.Committed_Idx-rf.Last_Applied_Idx)
		copy(entries, rf.Log_Array[rf.Last_Applied_Idx+1:rf.Committed_Idx+1])
		DPrintf("Node{%v} enries{%v}", rf.me, entries)
		// rf.mu.Unlock()
		for _, entry := range entries {
			rf.ApplyChan <- ApplyMsg{
				CommandValid: true,
				Command:      entry.Command,
				CommandIndex: entry.Index,
			}
		}

		// rf.mu.Lock()
		DPrintf("{Node %v} applies entries %v-%v in term %v", rf.me, rf.Last_Applied_Idx, rf.Committed_Idx, rf.CurrentTerm)
		rf.Last_Applied_Idx = int(math.Max(float64(rf.Last_Applied_Idx), float64(rf.Committed_Idx)))
		rf.mu.Unlock()
	}
}

applier协程这里是有个bug的,lock,unlock然后就暴露了中间态,不一致了,然后last_apply被下一步的commit先更新了,然后下一步就卡住了。。

还学到了这样打log                      Node{%v}  这样看的就很清楚有个括号。

测了500次,应该行了

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值