MIT 6.824 2A Raft Election

最新推荐文章于 2022-05-03 17:27:02 发布

SakamataZ

最新推荐文章于 2022-05-03 17:27:02 发布

阅读量1.8k

点赞数 3

分类专栏： # 后端文章标签： java 分布式

本文链接：https://blog.csdn.net/treblez/article/details/121339480

版权

后端专栏收录该内容

11 篇文章 1 订阅

订阅专栏

这个Lab吸取了上次的教训，尽量用小粒度的锁。
用的是主流的 electionLoop+pingLoop的实现，但是踩了很多坑，主要是Time.timer和term更新的问题。
这个链接：演示图像和Raft论文的图2是很有用的，最好一开始就按照这两个做。
遇到问题也不要死磕，可以看下别人的，自己漏了哪个地方，因为多线程+选举的程序，debug真的无比困难。

文章目录

知识点总结

raft 如何进行leader选举？
使用随机计时器
为什么要有raft？它用来解决什么问题？
raft是一个一致性算法，
复制状态机（replicated state machines）用于对有单个集群leader的大型系统进行leader选举和配置信息的存储。复制状态机通常使用复制日志实现，一致性算法的任务是保证复制日志的一致性。
共识算法的特性有哪些？
1.在网络延迟、分区和包丢失、复制和重排序条件下保证安全性（永远返回正确的结果）
2.只要大多数服务器可操作、可通信那么就是完全可用的
3.不依赖时间来保证一致性
4.少数速度较慢的服务器不影响整体系统性能
※raft如何实现一致性？
首先选举一位leader leader有管理复制日志的责任，接受来自客户的日志条目，在其它服务器上复制它们。
这样就把一致性问题分为了三个子问题：leader选举，日志复制与一致性，安全性
brain split如何解决？
在raft中，使用过半票决的方式解决。
如果系统中有2*F+1个服务器，那么，最多可以接受F个服务器出现故障
raft实际上更加依赖于过半服务，因为raft的每一个操作的过半服务器，必然有一个处于上一个操作的过半服务器中，新的过半服务器就会知道旧的过半服务器的term
raft在分布式数据库中的角色是什么？
中间层，构建多副本日志。只有过半服务器拷贝了操作的副本，才能够继续执行数据库操作
raft应用层调用流程？
客户端通过Start函数将请求存放在log中，commit之后，raft通过channel（ApplyCh）通知客户端成功。
包括Log位置，term number等等信息。

Raft 2A

在这里插入图片描述

一些坑

选举没注意处理过期的Term 导致follower总是莫名其妙变成leader
选举没注意更新Term
不要用Time.timer 不熟的话很容易出错

完整代码

package raft

//
// this is an outline of the API that raft must expose to
// the service (or tester). see comments below for
// each of these functions for more details.
//
// rf = Make(...)
//   create a new Raft server.
// rf.Start(command interface{}) (index, Term, isleader)
//   start agreement on a new log entry
// rf.GetState() (Term, isLeader)
//   ask a Raft for its current Term, and whether it thinks it is leader
// ApplyMsg
//   each time a new entry is committed to the log, each Raft peer
//   should send an ApplyMsg to the service (or tester)
//   in the same server.
//

import (
	"math/rand"
	//	"bytes"
	"sync"
	"sync/atomic"
	"time"

	//	"6.824/labgob"
	"6.824/labrpc"
)

// ApplyMsg
// as each Raft peer becomes aware that successive log Entries are
// committed, the peer should send an ApplyMsg to the service (or
// tester) on the same server, via the applyCh passed to Make(). set
// CommandValid to true to indicate that the ApplyMsg contains a newly
// committed log entry.
//
// in part 2D you'll want to send other kinds of messages (e.g.,
// snapshots) on the applyCh, but set CommandValid to false for these
// other uses.
//
type ApplyMsg struct {
	CommandValid bool
	Command      interface{}
	CommandIndex int

	// For 2D:
	SnapshotValid bool
	Snapshot      []byte
	SnapshotTerm  int
	SnapshotIndex int
}

const (
	FOLLOWER = iota
	CANDIDATE
	LEADER
)
type LogEntry struct {
	Command interface{}
	Term    int
}

//
// A Go object implementing a single Raft peer.
//
type Raft struct {
	mu        sync.Mutex          // Lock to protect shared access to this peer's state
	peers     []*labrpc.ClientEnd // RPC end points of all peers
	persister *Persister          // Object to hold this peer's persisted state
	me        int                 // this peer's index into peers[]
	dead      int32               // set by Kill()

	// Your data here (2A, 2B, 2C).
	// Look at the paper's Figure 2 for a description of what
	// state a Raft server must maintain.
	//persistent for all
	role			int
	currentTerm		int
	votedFor		int
	log 			[]*LogEntry
	//volatile for all
	commitIndex		int
	lastApplied		int
	//volatile for leaders
	nextIndex		[]int
	matchindex		[]int
	electionTimer time.Timer
	lastElectionTimer 	time.Time
	electTime 		int64
}

// return currentTerm and whether this server
// believes it is the leader.
func (rf *Raft) GetState() (int, bool) {
	var term int
	var isleader bool
	// Your code here (2A).
	rf.mu.Lock()
	defer rf.mu.Unlock()
	term = rf.currentTerm
	isleader = rf.role==LEADER
	return term, isleader
}
func (rf *Raft) getElectTime() int64 {
	rand.Seed(int64(rf.me)*1000)
	return rand.Int63n(100) + 300
}
func (rf *Raft) becomeCandidate() {
	_, err := DPrintf("%d change to candidate, it role is %d", rf.me,rf.role)
	if err != nil {
		return
	}
	rf.role = CANDIDATE
	rf.currentTerm++
	rf.votedFor = rf.me
	//rf.persist()
}

func (rf *Raft) becomeFollower(term int) {
	rf.role = FOLLOWER
	rf.currentTerm = term
	rf.votedFor = -1
	//rf.persist()
	//rf.lastElectionTimer = time.Now()
	_, err := DPrintf("%d change to follower with Term %d", rf.me, rf.currentTerm)
	if err != nil {
		return
	}
}

func (rf *Raft) becomeLeader() {
	rf.role = LEADER
	//rf.persist()
	/*
		for i := 0; i < rf.PeersNum; i++ {
			rf.matchIndex[i] = 0
			rf.nextIndex[i] = len(rf.log)
		}
	*/
	go rf.pingLoop()
	_, err := DPrintf("%d change to leader", rf.me)
	if err != nil {
		return
	}
}

func (rf *Raft) electionLoop() {
	for !rf.killed() {
		// 等待选举超时
		rf.mu.Lock()
		// 跳过leader
		if rf.role == LEADER {
			_, err := DPrintf("%d role is leader,will skip election...", rf.me )
			if err != nil {
				return
			}
			rf.mu.Unlock()
		}else if time.Since(rf.lastElectionTimer).Milliseconds() > rf.electTime{
			_, err := DPrintf("%d time out!lastElectionTimer is %d," +
				"electTime is %d will get lock...", rf.me,
				time.Since(rf.lastElectionTimer).Milliseconds(),
				rf.electTime)
			if err != nil {
				return
			}
			//开始选举
			rf.becomeCandidate()
			// request vote from each Peer except itself
			// 发起选举
			args := RequestVoteArgs{rf.currentTerm,rf.me,rf.lastApplied,
				rf.lastApplied}
			var conMutex sync.Mutex
			count := 1
			for server,_ := range rf.peers{
				if server==rf.me{
					continue
				}
				go func(server int){
					reply := RequestVoteReply{}
					ok := rf.SendRequestVote(server, &args, &reply)
					rf.mu.Lock()
					defer rf.mu.Unlock()
					_, err := DPrintf("%d send Request to %d", rf.me, server)
					if err != nil {
						return
					}
					// 注意！处理过期的term
					if ok==false||reply.Term<rf.currentTerm{
						_, err := DPrintf("%d return be", rf.me)
						if err != nil {
							return
						}
						return
					}
					// 更大的term，更新并且变成follower
					if reply.Term>rf.currentTerm{
						rf.currentTerm = reply.Term
						rf.becomeFollower(reply.Term)
						return
					}
					if reply.VoteGranted {
						conMutex.Lock()
						defer conMutex.Unlock()
						//  已经是leader了，跳过
						if rf.role==LEADER{
							return
						}
						count = count + 1
						if count> len(rf.peers)/2 {
							rf.becomeLeader()
							_, err := DPrintf("%d get %d ticket,become leader",rf.me, count)
							if err != nil {
								return
							}
						}
					}
				}(server)
			}
			rf.lastElectionTimer = time.Now()
			rf.electTime = rf.getElectTime()
			rf.mu.Unlock()
		}else{
			rf.mu.Unlock()
		}
		time.Sleep(80 * time.Millisecond)
	}
}

func (rf *Raft) pingLoop() {
	for {
		rf.mu.Lock()
		// 如果不是leader退出循环
		if rf.role != LEADER {
			_, err := DPrintf("%d not leader!",rf.me)
			if err != nil {
				return
			}
			rf.mu.Unlock()
			return
		}
		args := AppendEntriesArgs{}
		args.Term = rf.currentTerm
		args.LeaderId = rf.me
		for server,_ := range rf.peers{
			if server==rf.me{
				continue
			}
			rf.mu.Unlock()
			go func(server int){
				reply := AppendEntriesReply{}
				ok := rf.SendAppendEntries(server, &args, &reply)
				rf.mu.Lock()
				defer rf.mu.Unlock()
				if ok==false {
					return
				}
				// 注意！判断Term
				if reply.Term>rf.currentTerm{
					rf.becomeFollower(reply.Term)
				}
				_, err := DPrintf("leader %d send heart pack to %d",rf.me,server)
				if err != nil {
					return
				}
			}(server)
			rf.mu.Lock()
		}
		rf.mu.Unlock()
		time.Sleep(100*time.Millisecond)
	}
}


//
// save Raft's persistent state to stable storage,
// where it can later be retrieved after a crash and restart.
// see paper's Figure 2 for a description of what should be persistent.
//
func (rf *Raft) persist() {
	// Your code here (2C).
	// Example:
	// w := new(bytes.Buffer)
	// e := labgob.NewEncoder(w)
	// e.Encode(rf.xxx)
	// e.Encode(rf.yyy)
	// data := w.Bytes()
	// rf.persister.SaveRaftState(data)
}


//
// restore previously persisted state.
//
func (rf *Raft) readPersist(data []byte) {
	if data == nil || len(data) < 1 { // bootstrap without any state?
		return
	}
	// Your code here (2C).
	// Example:
	// r := bytes.NewBuffer(data)
	// d := labgob.NewDecoder(r)
	// var xxx
	// var yyy
	// if d.Decode(&xxx) != nil ||
	//    d.Decode(&yyy) != nil {
	//   error...
	// } else {
	//   rf.xxx = xxx
	//   rf.yyy = yyy
	// }
}


//
// A service wants to switch to snapshot.  Only do so if Raft hasn't
// have more recent info since it communicate the snapshot on applyCh.
//
func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool {

	// Your code here (2D).

	return true
}

// the service says it has created a snapshot that has
// all info up to and including index. this means the
// service no longer needs the log through (and including)
// that index. Raft should now trim its log as much as possible.
func (rf *Raft) Snapshot(index int, snapshot []byte) {
	// Your code here (2D).

}


//
// example RequestVote RPC arguments structure.
// field names must start with capital letters!
//
type RequestVoteArgs struct {
	// Your data here (2A, 2B).
	Term			int
	CandidateId		int
	LastLogIndex 	int
	LastLogTerm		int
}

//
// example RequestVote RPC reply structure.
// field names must start with capital letters!
//
type RequestVoteReply struct {
	// Your data here (2A).
	Term 	int
	VoteGranted	bool
}


func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {

	// Your code here (2A, 2B).
	rf.mu.Lock()
	defer rf.mu.Unlock()
	_, err := DPrintf("%d RequestVote get from %d", rf.me,args.CandidateId)
	if err != nil {
		return
	}
	// 拒绝投票
	if args.Term<=rf.currentTerm{
		_, err := DPrintf("confuse vote to %d",args.CandidateId)
		if err != nil {
			return
		}
		reply.VoteGranted = false
		reply.Term = rf.currentTerm
		return
	}
	//无论如何，更新Term
	reply.Term = rf.currentTerm
	// 转换为follower
	if args.Term > rf.currentTerm{
		_, err := DPrintf("%d become to follower because of bigger term",rf.me)
		if err != nil {
			return
		}
		//转换为follower
		rf.becomeFollower(args.Term)
	}
	//每个任期只能投票给一个人
	if rf.votedFor == -1 {
		_, err := DPrintf("%d vote to %d",rf.me, args.CandidateId)
		if err != nil {
			return
		}
		reply.VoteGranted = true
		rf.votedFor = args.CandidateId
		// 授予投票也要更新时间
		rf.lastElectionTimer = time.Now()
		// 投票之后再次更新Term！
		reply.Term = rf.currentTerm
	}
	rf.persist()
}

func (rf *Raft) SendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
	ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
	return ok
}

type AppendEntriesArgs struct {
	Term int
	LeaderId int
	PrevLogIndex int
	PrevLogTerm	[]int
	Entries		[]int
	LeaderCommit	int
}

type AppendEntriesReply struct{
	Term    int
	Success bool
}

func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply){
	rf.mu.Lock()
	defer rf.mu.Unlock()
	// 小于当前任
	if args.Term<rf.currentTerm {
		reply.Term = rf.currentTerm
		reply.Success = false
		return
	}
	// 发现更大的任期，则转为该任期的follower
	if args.Term > rf.currentTerm {
		rf.becomeFollower(args.Term)
	}
	// 正确的心跳包
	reply.Success = true
	rf.lastElectionTimer = time.Now()
	//日志操作
	rf.persist()
}
func (rf *Raft) SendAppendEntries(server int, args *AppendEntriesArgs, reply *AppendEntriesReply) bool {
	ok := rf.peers[server].Call("Raft.AppendEntries", args, reply)
	return ok
}
func (rf *Raft) Start(command interface{}) (int, int, bool) {
	index := -1
	term := -1
	isLeader := true

	// Your code here (2B).


	return index, term, isLeader
}


//
// the tester doesn't halt goroutines created by Raft after each test,
// but it does call the Kill() method. your code can use killed() to
// check whether Kill() has been called. the use of atomic avoids the
// need for a lock.
//
// the issue is that long-running goroutines use memory and may chew
// up CPU time, perhaps causing later tests to fail and generating
// confusing debug output. any goroutine with a long-running loop
// should call killed() to check whether it should stop.
//
func (rf *Raft) Kill() {
	atomic.StoreInt32(&rf.dead, 1)
	// Your code here, if desired.
}

func (rf *Raft) killed() bool {
	z := atomic.LoadInt32(&rf.dead)
	return z == 1
}

// The ticker go routine starts a new election if this peer hasn't received
// heartsbeats recently.
func (rf *Raft) ticker() {
	for rf.killed() == false {

		// Your code here to check if a leader election should
		// be started and to randomize sleeping time using
		// time.Sleep().

	}
}

// Make
// the service or tester wants to create a Raft server. the ports
// of all the Raft servers (including this one) are in peers[]. this
// server's port is peers[me]. all the servers' peers[] arrays
// have the same order. persister is a place for this server to
// save its persistent state, and also initially holds the most
// recent saved state, if any. applyCh is a channel on which the
// tester or service expects Raft to send ApplyMsg messages.
// Make() must return quickly, so it should start goroutines
// for any long-running work.
//
func Make(peers []*labrpc.ClientEnd, me int,
	persister *Persister, applyCh chan ApplyMsg) *Raft {
	rf := &Raft{}
	rf.peers = peers
	rf.persister = persister
	rf.me = me

	// Your initialization code here (2A, 2B, 2C).
	//一开始的角色是FOLLOWER
	rf.role = FOLLOWER
	rf.votedFor = -1
	rf.commitIndex = 0
	rf.lastApplied = 0
	rf.currentTerm = 0
	// initialize from state persisted before a crash
	rf.readPersist(persister.ReadRaftState())

	// start ticker goroutine to start elections
	//go rf.ticker()
	//设置选举过期时间
	rf.lastElectionTimer = time.Now()
	rf.electTime = rf.getElectTime()
	//开始选举循环
	go rf.electionLoop()

	return rf
}