客户端在发送消息给Leader时, 为了防止被Leader篡改, 同时还要发送消息的签名
如果此后Leader在分发日志给Follower时篡改了日志内容, Follower能够验签发现并立即发起选举
如果Follower在添加日志时进行篡改, 对BRaft的安全性无影响, 因为剩下的节点依然能够达成共识, 而篡改了日志的Follower将不具备成为Leader的能力
新增签名与摘要函数
package raft
import (
"bytes"
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"encoding/gob"
"encoding/pem"
// "fmt"
"log"
)
var publicKey = []byte(`
-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEApzo9DwzkZ4TsYxo1DBpw
HZt6SvRghMwEpzYlJz8OvC/wbpQWlnIcr3zBS13WQSUUk8LSa0DSqTU4V8yLD1rR
luW5lukqO5MaBCAln2/LiCupxDRTFkrWmkf515GcmpWHJDnBJNfKmD7qQh++8O90
V42rsV8mZhdiMKQrp4YKkx0tD9gTQ+sTNi/g2qZrHKPYHafHN0Ads515XsdHg/MF
DnzDdjIsvXNAU3J8eAOK6O7AWmL7jV10EXIJpVDGka9li/1uCtE2XlusJMx+WxYz
rZErCjC/hEpqHQ2VfO8URHqb84FD36o+Y1hHhxF6AOP6G5WgAy9Tblbm9cT1HFx9
jQIDAQAB
-----END PUBLIC KEY-----
`)
var privateKey = []byte(`
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEApzo9DwzkZ4TsYxo1DBpwHZt6SvRghMwEpzYlJz8OvC/wbpQW
lnIcr3zBS13WQSUUk8LSa0DSqTU4V8yLD1rRluW5lukqO5MaBCAln2/LiCupxDRT
FkrWmkf515GcmpWHJDnBJNfKmD7qQh++8O90V42rsV8mZhdiMKQrp4YKkx0tD9gT
Q+sTNi/g2qZrHKPYHafHN0Ads515XsdHg/MFDnzDdjIsvXNAU3J8eAOK6O7AWmL7
jV10EXIJpVDGka9li/1uCtE2XlusJMx+WxYzrZErCjC/hEpqHQ2VfO8URHqb84FD
36o+Y1hHhxF6AOP6G5WgAy9Tblbm9cT1HFx9jQIDAQABAoIBAECDbDjZLWhuVE+r
NZnUvTn+2EAAZRf2KTlk3xJz9jhNekD+qnQh08UzqNJtghGhv319pHWyDVMv7+uX
QnKLA95mA6Ifk6ZmCpxa1ojatTd0OMszsHYiKwZcDBvI1hSg6QDlswiGo2b2pqMZ
4izLBCQeyITmA0dRcBT50MmRIZU9BXULU5iXA0KwJCaAtcG8YvbiT2N+ZS8sY32Q
QY8hSPH4vThO4bpPYlMcv8nPc50mfEJ1PI7cH9dYq8N7tMZD8tn6rio3S59Ll401
SoOQuPOZK0OBZgxcHSlDImWAN/kuhZLzGneHKcPpqrXj3Is3tADcNUOoRf7J6UBJ
QlywL2ECgYEAxpTDAtUMatTXObpiVZ2r68iav4KbDiEj1h7RpezNIx9MNe1D8ybU
68eeqka0EkWxoWLL59OHG0wzDcJ8se0uIqDsgX5FMiL/NFdegfp9dpRflXkzcCee
XvAP4xZfNHSrGvUIIrY148ECnVo6UhTynK6IN9GKVCFgHk6MtazHwuUCgYEA15Sk
FCZi2JmwokeS99q+Uqqtr88nu2yr2lUX/tn08iHhaTRcw2xYCI9xAivPVyMFIjKd
uBmyjnsaUZ23zD8U8FtKWo1cyIQ7PO9+NaNHrgS6pI4jNr1RMYbhknBBglcfAsbB
p0a9qKjXQ2gzJACYmobme9lBL03v3iW7LsKIXYkCgYBu5zv2A/gYXeAJfH9Yo2MV
noZWOGHSRU3XUoTxbsuuNteAMo9FZ8V4HJcPL8d3gPbQU/Xe9fK5mxfUMm8ji3u9
mTQcqeGJO6RdngHJA5U4OWscdoD0vRukl9u3jpIDILlCp+AwSqTUGsIUEQULPGm2
eX7X9a2UiMM+ic3p1KIHxQKBgCH0JUuPKC5ZNnq4ryseZq96dlSkWeupGAAROvBG
v8+LCoeZWarl24+tl+zxnXxp5ZsXQcQHOBo8xU5petNOdCvPFQziCuUB/pqAVe54
wwdjc0oLoPw0IR+d0NVRnN+8fQPg7gs8lw6DWTQiqztWZLKh4JdNBnk+2zKv2qVo
ujWRAoGBAIPiseHRQwYS6sAwnJUk6Gl04MgFIzWtjr5zHCJU/O+mp+XVfmS2NxwI
63aElv0lbxX/OHGIC7hJ/5ipwTLh8cQUVqdul5A0fIbtdkNlLa7tfII5FRDhTCZT
vAohlGrLXWPCwzSPY7GIWdsni7wDb9Sfvef/k5pKsoAn8CgU2olQ
-----END RSA PRIVATE KEY-----
`)
func GetBytes(key interface{}) []byte {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
err := enc.Encode(key)
if err != nil {
panic(err)
}
return buf.Bytes()
}
func Signature(origindata []byte) []byte {
// 获取私钥
block, _ := pem.Decode(privateKey)
if block == nil {
log.Println("private key error")
return nil
}
priv, err := x509.ParsePKCS1PrivateKey(block.Bytes)
if err != nil {
log.Println(err)
return nil
}
// Message - Signature
var opts rsa.PSSOptions
opts.SaltLength = rsa.PSSSaltLengthAuto
PSSmessage := origindata
newhash := crypto.SHA256 // Hash function
pssh := newhash.New()
pssh.Write(PSSmessage)
hashed := pssh.Sum(nil)
// 使用私钥对哈希值进行加密
signature, err := rsa.SignPSS(rand.Reader, priv, newhash, hashed, &opts)
if err != nil {
log.Println(err)
return nil
}
return signature
}
func verifySignature(origindata []byte, sig []byte) bool {
// 获取公钥
block, _ := pem.Decode(publicKey)
if block == nil {
log.Println("public key error")
return false
}
pubInterface, err := x509.ParsePKIXPublicKey(block.Bytes)
if err != nil {
log.Println(err)
return false
}
pub := pubInterface.(*rsa.PublicKey)
// Message - Signature
var opts rsa.PSSOptions
opts.SaltLength = rsa.PSSSaltLengthAuto
PSSmessage := origindata
newhash := crypto.SHA256 // Hash function
pssh := newhash.New()
pssh.Write(PSSmessage)
hashed := pssh.Sum(nil)
// 使用公钥验证数字签名
err = rsa.VerifyPSS(pub, newhash, hashed, sig, &opts)
if err != nil {
log.Println(err)
return false
}
// log.Println("Verify signature successful")
return true
}
//
// 假设数据并不需要加密传输,只需要通过数字签名来防止被篡改。
// 步骤:
// 1. 使用私钥对消息摘要进行加密,形成数字签名;
// 2. 使用公钥对签名进行解密,得到摘要原文;
// 3. 计算消息摘要,并与解密得到的摘要原文进行比对。
//
// func main() {
// sig := signature(origindata)
// result := verifySignature(origindata, sig)
// fmt.Println(result)
// }
func SHA256(elem interface{}) (string, error) {
buf := GetBytes(elem)
sum := sha256.Sum256(buf)
ret := fmt.Sprintf("%x", sum)
return ret, nil
}
把start函数改为start0, 增加签名
func (rf *Raft) Start0(command interface{}, sig []byte) (int, int, bool) {
fmt.Println("signature:", sig)
rf.mu.Lock()
defer rf.mu.Unlock()
if rf.role != Leader {
return 0, 0, false
}
rf.log = append(rf.log, LogEntry{
CommandValid: true,
Command: command,
Term: rf.currentTerm,
Signature: sig,
})
LOG(rf.me, rf.currentTerm, DLeader, "Leader accept log [%d]T%d", len(rf.log)-1, rf.currentTerm)
rf.persistLocked()
return len(rf.log) - 1, rf.currentTerm, true
}
在AppendEntris中增加验签逻辑, 发现日志被篡改立即发起选举
if len(args.Entries) > 0 && !verifySignature(GetBytes(args.Entries[0].Command), args.Entries[0].Signature) {
rf.becomeCandidateLocked()
fmt.Printf("FOLLOWER %d becomes CANDIDATE..., Current Time: %v\n", rf.me, time.Now().UnixNano()/1000000)
return
}
修改config中的one函数, 增加签名, 此时原本的测试程序会不可用
func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int {
t0 := time.Now()
starts := 0
for time.Since(t0).Seconds() < 10 && cfg.checkFinished() == false {
// try all the servers, maybe one is the leader.
index := -1
for si := 0; si < cfg.n; si++ {
starts = (starts + 1) % cfg.n
var rf *Raft
cfg.mu.Lock()
if cfg.connected[starts] {
rf = cfg.rafts[starts]
}
cfg.mu.Unlock()
if rf != nil {
cmdBytes := GetBytes(cmd)
sig := Signature(cmdBytes)
index1, _, ok := rf.Start0(cmd, sig)
if ok {
index = index1
break
}
}
}
新建测试, 调用Start0
package raft
import (
// "encoding/binary"
"fmt"
"testing"
"time"
)
const ElectionTimeout = 1000 * time.Millisecond
const CommandBreak = 50 * time.Millisecond
var responseCount int = 0
func (rf *Raft) ReceiveResponse(response interface{}) {
responseCount += 1
}
// PASSED
func TestStartCommand(t *testing.T) {
servers := 30
cfg := make_config(t, servers, false, false)
defer cfg.cleanup()
fmt.Printf("Initial election ...\n")
leader := cfg.checkOneLeader()
fmt.Printf("Leader: %d\n", leader)
// time.Sleep(2000 * time.Millisecond)
start := time.Now()
var index int
var count int = 1
for i := 0; i < count; i++ {
// fmt.Printf("i: %d\n", i)
var ok bool
command := i
// 生成数字签名
cmdBytes := GetBytes(command)
sig := Signature(cmdBytes)
time.Sleep(CommandBreak)
index, _, ok = cfg.rafts[leader].Start0(command, sig)
if ok {
fmt.Println("Index:", index)
} else {
fmt.Println("Failed.")
}
}
time.Sleep(2 * ElectionTimeout)
n, _ := cfg.nCommitted(index)
for server := range cfg.rafts {
rf := *cfg.rafts[server]
fmt.Printf("server: %d, commitIndex: %d\n", rf.me, rf.commitIndex)
// PrintSortedMap(rf.m)
}
fmt.Println("Committed number:", n)
end := time.Now()
elapsed := end.Sub(start)
fmt.Printf("command count: %d, time elapsed: %v\n", count, elapsed-2*ElectionTimeout-100*CommandBreak)
// end := time.Now()
// elapsed := end.Sub(start)
// fmt.Printf("command count: %d, time elapsed: %v\n", count, elapsed)
}
// PASSED
func TestInitialElection2A(t *testing.T) {
servers := 6
cfg := make_config(t, servers, false, false)
defer cfg.cleanup()
fmt.Printf("Test (2A): initial election ...\n")
// is a leader elected?
leader := cfg.checkOneLeader()
command := 20
// 生成数字签名
cmdBytes := GetBytes(command)
sig := Signature(cmdBytes)
index, _, ok := cfg.rafts[leader].Start0(command, sig)
fmt.Printf("index: %d, ok: %v\n", index, ok)
// does the leader+term stay the same if there is no network failure?
term1 := cfg.checkTerms()
time.Sleep(2 * ElectionTimeout)
term2 := cfg.checkTerms()
if term1 != term2 {
fmt.Printf("warning: term changed even though there were no failures")
}
fmt.Printf(" ... Passed\n")
}
// PASSED
func TestReElection2A(t *testing.T) {
servers := 6
cfg := make_config(t, servers, false, false)
defer cfg.cleanup()
fmt.Printf("Test: election after network failure ...\n")
leader1 := cfg.checkOneLeader()
fmt.Printf("leader1: %d\n", leader1)
// if the leader disconnects, a new one should be elected.
cfg.disconnect(leader1)
tmpLeader := cfg.checkOneLeader()
fmt.Printf("tmpLeader: %d\n", tmpLeader)
// if the old leader rejoins, that shouldn't
// disturb the old leader.
cfg.connect(leader1)
leader2 := cfg.checkOneLeader()
fmt.Printf("leader2: %d\n", leader2)
fmt.Printf(" ... Passed\n")
}
// PASSED
func TestBasicAgree2B(t *testing.T) {
servers := 6
cfg := make_config(t, servers, false, false)
defer cfg.cleanup()
fmt.Printf("Test (2B): basic agreement ...\n")
iters := 1
for index := 1; index < iters+1; index++ {
// fmt.Println("....")
nd, _ := cfg.nCommitted(index)
if nd > 0 {
t.Fatalf("some have committed before Start()")
}
xindex := cfg.one(index*100, servers, false)
if xindex != index {
t.Fatalf("got index %v but expected %v", xindex, index)
}
}
fmt.Printf(" ... Passed\n")
}
// PASSED
func TestFailAgree2B(t *testing.T) {
servers := 6
cfg := make_config(t, servers, false, false)
defer cfg.cleanup()
fmt.Printf("Test (2B): agreement despite follower disconnection ...\n")
ret := cfg.one(101, servers, false)
fmt.Printf("ret: %d\n", ret)
// follower network disconnection
leader := cfg.checkOneLeader()
fmt.Printf("leader: %d\n", leader)
cfg.disconnect((leader + 1) % servers)
// agree despite one disconnected server?
cfg.one(102, servers-1, false)
cfg.one(103, servers-1, false)
time.Sleep(ElectionTimeout)
cfg.one(104, servers-1, false)
cfg.one(105, servers-1, false)
// re-connect
cfg.connect((leader + 1) % servers)
// agree with full set of servers?
cfg.one(106, servers, false)
time.Sleep(ElectionTimeout)
cfg.one(107, servers, false)
fmt.Printf(" ... Passed\n")
}
遗留问题
需要彻底修改客户端交互的逻辑
恶意Leader节点可以拒绝客户端请求, 因此BRaft的客户端交互不再仅依赖Leader, 而是需要所有参与共识的节点参与; 包含四个阶段
1 客户端向BRaft集群发送请求, 如果收到请求的节点不是Leader则转发请求给Leader
2 Leader收到请求后执行日志复制操作
3 Leader和Follower将日志传递给状态机执行后, 各自将执行结果响应给客户端
4 如果客户端收到f+1个一致的响应, 则将其视为正确的响应
BRaft中任意节点需要给返回客户端的响应进行数字签名, 客户端持有所有节点的公钥, 它负责校验所有响应确保响应的消息未被篡改且每个节点只响应一次