Lab4 Sharded Key/Value Service
PartA
本部分实验需要完成分配服务器和分片之间的控制器
-
实验流程
-
对象设计
主要的对象为服务器ShardCtrler,指令Op,客户端Clerk
-
ShardCtrler
服务器需要记录自己的编号和对应的Raft服务,以及相应的ApplyCh用于Raft服务器应用,也可以通知自身日志已经存储完成。存储所有的config的信息,并且设置通道用于通知服务器对应的指令是否已经在Raft处存储完成,为了防止不同的客户端请求不重复,设置lastRequestId记录每个客户端上次请求的ID。
type ShardCtrler struct { mu sync.Mutex me int rf *raft.Raft applyCh chan raft.ApplyMsg // Your data here. configs []Config // indexed by config num waitApplyCh map[int]chan Op lastRequestId map[int64]int }
-
Op
指令需要记录类型,并且记录请求的客户端的id和请求的id,确保请求不重复。并且设置不同的值用于存储不同请求的参数。
type Op struct { // Your data here. OperationType string //操作类型,put.get,append ClientId int64 RequestId int QueryNum int JoinServers map[int][]string LeaveGids []int MoveShard int MoveGid int }
-
Clerk
客户端有自己的编号,新的请求的编号防止重复请求,并存储所有的服务器和可能时leader的服务器id
type Clerk struct { servers []*labrpc.ClientEnd // Your data here. clientId int64 requestId int recentLeaderId int }
-
-
流程设计
-
服务器启动
初始化参数,后台启动线程持续读取已经被Raft提交应用的命令
func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler { sc := new(ShardCtrler) sc.me = me sc.configs = make([]Config, 1) sc.configs[0].Groups = map[int][]string{ } labgob.Register(Op{ }) sc.applyCh = make(chan raft.ApplyMsg) sc.rf = raft.Make(servers, me, persister, sc.applyCh) // Your code here. sc.waitApplyCh = make(map[int]chan Op) sc.lastRequestId = make(map[int64]int) go sc.ReadRaftApplyCommand() return sc }
-
读取已经应用的命令
根据已经应用的程序进行分类处理,然后把信息发送给WaitChan通知处理完成
func (sc *ShardCtrler) GetCommand(msg raft.ApplyMsg) { op := msg.Command.(Op) if !sc.ifRequestRepetition(op.ClientId, op.RequestId) { if op.OperationType == "Join" { sc.ExecuteJoinOnConfig(op) } if op.OperationType == "Leave" { sc.ExecuteLeaveOnConfig(op) } if op.OperationType == "Move" { sc.ExecuteMoveOnConfig(op) } } sc.SendMessageToWaitChan(op, msg.CommandIndex) }
-
通知操作完成
func (sc *ShardCtrler) SendMessageToWaitChan(op Op, index int) { //修改之前需要上锁 sc.mu.Lock() defer sc.mu.Unlock() //检查waitChan是否已经初始化对应位置的值 ch, exist := sc.waitApplyCh[index] if exist { ch <- op } }
-
启动客户端
func MakeClerk(servers []*labrpc.ClientEnd) *Clerk { ck := new(Clerk) ck.servers = servers // Your code here. ck.clientId = nrand() ck.recentLeaderId = mathrand.Intn(len(servers)) return ck }
-
客户端发起Query请求
设置需要查看的config的Id,发起rpc请求
func (ck *Clerk) Query(num int) Config { // Your code here. ck.requestId++ sev := ck.recentLeaderId for { args := QueryArgs{ Num: num, ClientId: ck.clientId, RequestId: ck.requestId, } reply := QueryReply{ } ok := ck.servers[sev].Call("ShardCtrler.Query", &args, &reply)
-
服务器接收请求
需要自身连接的Raft服务器是leader才能返回请求。将请求信息填入操作中,发给Raft存储,等待指令的完成。如果等待超时,则检查是不是已经提交的指令,若是则直接执行Query操作,否则返回WrongLeader的错误。如果从WaitChan中收到信息,则需检查是否与请求信息一致,如是则执行Query,不是则返回错误。
func (sc *ShardCtrler) Query(args *QueryArgs, reply *QueryReply) { // Your code here. _, ifLeader := sc.rf.GetState() if !ifLeader { reply.WrongLeader = true return } op := Op{ OperationType: "Query", ClientId: args.ClientId, RequestId: args.RequestId, QueryNum: args.Num, } rfIndex, _, _ := sc.rf.Start(op) sc.mu.Lock() chForWaitCh, exist := sc.waitApplyCh[rfIndex] if !exist { sc.waitApplyCh[rfIndex] = make(chan Op, 1) chForWaitCh = sc.waitApplyCh[rfIndex] } sc.mu.Unlock() select { case <-time.After(time.Millisecond * RfTimeOut): if sc.ifRequestRepetition(op.ClientId, op.RequestId) { reply.Config = sc.ExecuteQueryOnConfig(op) reply.Err = OK } else { reply.WrongLeader = true } case rfCommitOp := <-chForWaitCh: if rfCommitOp.ClientId == op.ClientId && rfCommitOp.RequestId == op.RequestId { reply.Config = sc.ExecuteQueryOnConfig(op) reply.Err = OK } else { reply.WrongLeader = true } } sc.mu.Lock() delete(sc.waitApplyCh, rfIndex) sc.mu.Unlock() return }
-
服务器执行Query操作
检查Query的参数若为-1或者大于自身config的最大值,则返回最新的config的信息。否则直接返回请求的config。
func (sc *ShardCtrler) ExecuteQueryOnConfig(op Op) Config { sc.mu.Lock() defer sc.mu.Unlock() sc.lastRequestId[op.ClientId] = op.RequestId if op.QueryNum == -1 || op.QueryNum >= len(sc.configs) { DPrintf("Server:%d,Query Config:%v", sc.me, sc.configs[len(sc.configs)-1]) return sc.configs[len(sc.configs)-1] } else { return sc.configs[op.QueryNum] } }
-
客户端收到Query回复
如果错误则重新选择一个服务器发起请求,若正确则返回结果
if !ok || reply.WrongLeader == true { sev = (sev + 1) % len(ck.servers) continue } if reply.Err == OK { ck.recentLeaderId = sev return reply.Config } time.Sleep(100 * time.Millisecond)
-
客户端发起Join请求
在请求的参数中设置希望新增的服务器,初始化相关的参数
ck.requestId++ sev := ck.recentLeaderId // Your code here. for { args := JoinArgs{ Servers: servers, ClientId: ck.clientId, RequestId: ck.requestId, } reply := JoinReply{ } ok := ck.servers[sev].Call("ShardCtrler.Join", &args, &reply)
-
服务器接收Join请求
利用Join请求的信息生产新的指令Op发送给Raft服务器,等待Raft存储提交。
func (sc *ShardCtrler) Join(args *JoinArgs, reply *JoinReply) { // Your code here. _, ifLeader := sc.rf.GetState() if !ifLeader { reply.WrongLeader = true return } op := Op{ OperationType: "Join", ClientId: args
-