


  1. 扩展性较好,节点的加入和退出都可以无损有序,对应用优化。
  2. 容错性较好,任何节点的宕机都不会影响正在运行的节点。
  3. 健壮性较好,因为没有中心化的节点,所有对象都是对等。
  4. 最终一致性,当协议运行一段时间之后,数据都是一致的。
  1. 通信量较大,数据都是随机同步传播的,因为节点与节点之间需要频繁的通信,如果传播频率控制不当或者传播数据量大易引起网络拥堵。
  2. 最终一致的时间会有延迟,缓慢的传播不利于快速收敛。






package main

import (

var (
	mtx        sync.RWMutex
	members    = flag.String("members", "", "comma seperated list of members")
	port       = flag.Int("port", 4001, "http port")
	gossipPort       = flag.Int("p", 40001, "goissp port")
	items      = map[string]string{}
	broadcasts *memberlist.TransmitLimitedQueue

type broadcast struct {
	msg    []byte
	notify chan<- struct{}

type delegate struct{}

type update struct {
	Action string // add, del
	Data   map[string]string

func init() {

func (b *broadcast) Invalidates(other memberlist.Broadcast) bool {
	return false

func (b *broadcast) Message() []byte {
	return b.msg

func (b *broadcast) Finished() {
	if b.notify != nil {

func (d *delegate) NodeMeta(limit int) []byte {
	return []byte{}

func (d *delegate) NotifyMsg(b []byte) {
	if len(b) == 0 {
	switch b[0] {
	case 'd': // data
		var updates []*update
		if err := json.Unmarshal(b[1:], &updates); err != nil {
		for _, u := range updates {
			for k, v := range u.Data {
				log.Println("   notify   ", u.Action, k, v)
				switch u.Action {
				case "add":
					items[k] = v
				case "del":
					delete(items, k)

func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
	return broadcasts.GetBroadcasts(overhead, limit)

func (d *delegate) LocalState(join bool) []byte {
	m := items
	b, _ := json.Marshal(m)
	return b

func (d *delegate) MergeRemoteState(buf []byte, join bool) {
	if len(buf) == 0 {
	if !join {
	var m map[string]string
	if err := json.Unmarshal(buf, &m); err != nil {
	for k, v := range m {
		items[k] = v

type eventDelegate struct{}

func (ed *eventDelegate) NotifyJoin(node *memberlist.Node) {
	fmt.Println("A node has joined: " + node.String())

func (ed *eventDelegate) NotifyLeave(node *memberlist.Node) {
	fmt.Println("A node has left: " + node.String())

func (ed *eventDelegate) NotifyUpdate(node *memberlist.Node) {
	fmt.Println("A node was updated: " + node.String())

func addHandler(w http.ResponseWriter, r *http.Request) {
	key := r.Form.Get("key")
	val := r.Form.Get("val")
	items[key] = val

	b, err := json.Marshal([]*update{
			Action: "add",
			Data: map[string]string{
				key: val,

	if err != nil {
		http.Error(w, err.Error(), 500)

		msg:    append([]byte("d"), b...),
		notify: nil,

func delHandler(w http.ResponseWriter, r *http.Request) {
	key := r.Form.Get("key")
	delete(items, key)

	b, err := json.Marshal([]*update{{
		Action: "del",
		Data: map[string]string{
			key: "",

	if err != nil {
		http.Error(w, err.Error(), 500)

		msg:    append([]byte("d"), b...),
		notify: nil,

func getHandler(w http.ResponseWriter, r *http.Request) {
	key := r.Form.Get("key")
	val := items[key]

func start() error {
	hostname, _ := os.Hostname()
	c := memberlist.DefaultLANConfig()
	c.TCPTimeout = time.Second
	c.IndirectChecks = 1
	c.RetransmitMult = 2
	c.SuspicionMult = 3
	c.PushPullInterval = 15 * time.Second
	c.ProbeTimeout = 200 * time.Millisecond
	c.ProbeInterval = time.Second
	c.GossipInterval = 100 * time.Millisecond
	c.GossipToTheDeadTime = 15 * time.Second

	c.Events = &eventDelegate{}
	c.Delegate = &delegate{}
	c.BindPort = *gossipPort
	c.Name = hostname + "-" + uuid.NewUUID().String()
	fmt.Println(c.GossipInterval, c.GossipToTheDeadTime)
	m, err := memberlist.Create(c)
	if err != nil {
		return err
	if len(*members) > 0 {
		parts := strings.Split(*members, ",")
		_, err := m.Join(parts)
		if err != nil {
			return err
	broadcasts = &memberlist.TransmitLimitedQueue{
		NumNodes: func() int {
			return m.NumMembers()
		RetransmitMult: 3,
	node := m.LocalNode()
	fmt.Printf("Local member %s:%d\n", node.Addr, node.Port)
	return nil

func main() {
	if err := start(); err != nil {

	http.HandleFunc("/add", addHandler)
	http.HandleFunc("/del", delHandler)
	http.HandleFunc("/get", getHandler)
	fmt.Printf("Listening on :%d\n", *port)
	if err := http.ListenAndServe(fmt.Sprintf(":%d", *port), nil); err != nil {


go run main.go --p=40001

go run main.go --members= --port=4002 --p=40002

curl "http://localhost:4001/add?key=foo5&val=bar5"

curl "http://localhost:4002/get?key=foo5"




// Create will create a new Memberlist using the given configuration.
// This will not connect to any other node (see Join) yet, but will start
// all the listeners to allow other nodes to join this memberlist.
// After creating a Memberlist, the configuration given should not be
// modified by the user anymore.
func Create(conf *Config) (*Memberlist, error) {
	m, err := newMemberlist(conf)
	if err != nil {
		return nil, err
	if err := m.setAlive(); err != nil {
		return nil, err
	return m, nil



  1. 故障检查。
  2. 数据同步(pull/push)。
  3. 广播消息。


// newMemberlist creates the network listeners.
// Does not schedule execution of background maintenance.
func newMemberlist(conf *Config) (*Memberlist, error) {

	// Set up a network transport by default if a custom one wasn't given
	// by the config.
	transport := conf.Transport
	if transport == nil {
		nc := &NetTransportConfig{
			BindAddrs: []string{conf.BindAddr},
			BindPort:  conf.BindPort,
			Logger:    logger,

		// See comment below for details about the retry in here.
		makeNetRetry := func(limit int) (*NetTransport, error) {
			var err error
			for try := 0; try < limit; try++ {
				var nt *NetTransport
				if nt, err = NewNetTransport(nc); err == nil { //创建tcp和udp的服务
					return nt, nil
				if strings.Contains(err.Error(), "address already in use") {
					logger.Printf("[DEBUG] memberlist: Got bind error: %v", err)

			return nil, fmt.Errorf("failed to obtain an address: %v", err)

		// The dynamic bind port operation is inherently racy because
		// even though we are using the kernel to find a port for us, we
		// are attempting to bind multiple protocols (and potentially
		// multiple addresses) with the same port number. We build in a
		// few retries here since this often gets transient errors in
		// busy unit tests.
		limit := 1
		if conf.BindPort == 0 {
			limit = 10

		nt, err := makeNetRetry(limit)
		if err != nil {
			return nil, fmt.Errorf("Could not set up network transport: %v", err)
		if conf.BindPort == 0 {
			port := nt.GetAutoBindPort()
			conf.BindPort = port
			conf.AdvertisePort = port
			logger.Printf("[DEBUG] memberlist: Using dynamic bind port %d", port)
		transport = nt

	nodeAwareTransport, ok := transport.(NodeAwareTransport)
	if !ok {
		logger.Printf("[DEBUG] memberlist: configured Transport is not a NodeAwareTransport and some features may not work as desired")
		nodeAwareTransport = &shimNodeAwareTransport{transport}

	m := &Memberlist{
		config:               conf,
		shutdownCh:           make(chan struct{}),
		leaveBroadcast:       make(chan struct{}, 1),
		transport:            nodeAwareTransport,
		handoffCh:            make(chan struct{}, 1),
		highPriorityMsgQueue: list.New(),
		lowPriorityMsgQueue:  list.New(),
		nodeMap:              make(map[string]*nodeState),
		nodeTimers:           make(map[string]*suspicion),
		awareness:            newAwareness(conf.AwarenessMaxMultiplier),
		ackHandlers:          make(map[uint32]*ackHandler),
		broadcasts:           &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
		logger:               logger,
	m.broadcasts.NumNodes = func() int {
		return m.estNumNodes()

	// Get the final advertise address from the transport, which may need
	// to see which address we bound to. We'll refresh this each time we
	// send out an alive message.
	if _, _, err := m.refreshAdvertise(); err != nil {
		return nil, err

	go m.streamListen()   // tcp服务
	go m.packetListen()  // udp服务
	go m.packetHandler()
	return m, nil


// NewNetTransport returns a net transport with the given configuration. On
// success all the network listeners will be created and listening.
func NewNetTransport(config *NetTransportConfig) (*NetTransport, error) {
	// If we reject the empty list outright we can assume that there's at
	// least one listener of each type later during operation.
	if len(config.BindAddrs) == 0 {
		return nil, fmt.Errorf("At least one bind address is required")

	// Build out the new transport.
	var ok bool
	t := NetTransport{
		config:   config,
		packetCh: make(chan *Packet),  // 生成管道
		streamCh: make(chan net.Conn),
		logger:   config.Logger,

	// Clean up listeners if there's an error.
	defer func() {
		if !ok {

	// Build all the TCP and UDP listeners.
	port := config.BindPort
	for _, addr := range config.BindAddrs {
		ip := net.ParseIP(addr)

		tcpAddr := &net.TCPAddr{IP: ip, Port: port}  // 设置tcp地址
		tcpLn, err := net.ListenTCP("tcp", tcpAddr)
		if err != nil {
			return nil, fmt.Errorf("Failed to start TCP listener on %q port %d: %v", addr, port, err)
		t.tcpListeners = append(t.tcpListeners, tcpLn)

		// If the config port given was zero, use the first TCP listener
		// to pick an available port and then apply that to everything
		// else.
		if port == 0 {
			port = tcpLn.Addr().(*net.TCPAddr).Port

		udpAddr := &net.UDPAddr{IP: ip, Port: port}  // 设置udp地址
		udpLn, err := net.ListenUDP("udp", udpAddr)
		if err != nil {
			return nil, fmt.Errorf("Failed to start UDP listener on %q port %d: %v", addr, port, err)
		if err := setUDPRecvBuf(udpLn); err != nil {
			return nil, fmt.Errorf("Failed to resize UDP buffer: %v", err)
		t.udpListeners = append(t.udpListeners, udpLn)

	// Fire them up now that we've been able to create them all.
	for i := 0; i < len(config.BindAddrs); i++ {
		go t.tcpListen(t.tcpListeners[i])   // 将每个服务端都监控起来
		go t.udpListen(t.udpListeners[i])

	ok = true
	return &t, nil


// tcpListen is a long running goroutine that accepts incoming TCP connections
// and hands them off to the stream channel.
func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) {
	defer t.wg.Done()

	// baseDelay is the initial delay after an AcceptTCP() error before attempting again
	const baseDelay = 5 * time.Millisecond

	// maxDelay is the maximum delay after an AcceptTCP() error before attempting again.
	// In the case that tcpListen() is error-looping, it will delay the shutdown check.
	// Therefore, changes to maxDelay may have an effect on the latency of shutdown.
	const maxDelay = 1 * time.Second

	var loopDelay time.Duration
	for {
		conn, err := tcpLn.AcceptTCP()  // 接受请求
		if err != nil {
			if s := atomic.LoadInt32(&t.shutdown); s == 1 {

			if loopDelay == 0 {
				loopDelay = baseDelay
			} else {
				loopDelay *= 2

			if loopDelay > maxDelay {
				loopDelay = maxDelay

			t.logger.Printf("[ERR] memberlist: Error accepting TCP connection: %v", err)
		// No error, reset loop delay
		loopDelay = 0

		t.streamCh <- conn   // 传入连接处理


// udpListen is a long running goroutine that accepts incoming UDP packets and
// hands them off to the packet channel.
func (t *NetTransport) udpListen(udpLn *net.UDPConn) {
	defer t.wg.Done()
	for {
		// Do a blocking read into a fresh buffer. Grab a time stamp as
		// close as possible to the I/O.
		buf := make([]byte, udpPacketBufSize)
		n, addr, err := udpLn.ReadFrom(buf)  // 读取数据
		ts := time.Now()
		if err != nil {
			if s := atomic.LoadInt32(&t.shutdown); s == 1 {

			t.logger.Printf("[ERR] memberlist: Error reading UDP packet: %v", err)

		// Check the length - it needs to have at least one byte to be a
		// proper message.
		if n < 1 {
			t.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes) %s",
				len(buf), LogAddress(addr))

		// Ingest the packet.
		metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n))
		t.packetCh <- &Packet{  // 接受获取的数据传入处理
			Buf:       buf[:n],
			From:      addr,
			Timestamp: ts,



// streamListen is a long running goroutine that pulls incoming streams from the
// transport and hands them off for processing.
func (m *Memberlist) streamListen() {
   for {
      select {
      case conn := <-m.transport.StreamCh():
         go m.handleConn(conn)   // 处理连接

      case <-m.shutdownCh:

// handleConn handles a single incoming stream connection from the transport.
func (m *Memberlist) handleConn(conn net.Conn) {
   defer conn.Close()
   m.logger.Printf("[DEBUG] memberlist: Stream connection %s", LogConn(conn))

   metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1)

   msgType, bufConn, dec, err := m.readStream(conn)  // 解析类型
   if err != nil {
      if err != io.EOF {
         m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn))

         resp := errResp{err.Error()}
         out, err := encode(errMsg, &resp)
         if err != nil {
            m.logger.Printf("[ERR] memberlist: Failed to encode error response: %s", err)

         err = m.rawSendMsgStream(conn, out.Bytes())
         if err != nil {
            m.logger.Printf("[ERR] memberlist: Failed to send error: %s %s", err, LogConn(conn))

   switch msgType {
   case userMsg:
      if err := m.readUserMsg(bufConn, dec); err != nil { // 获取用户数据
         m.logger.Printf("[ERR] memberlist: Failed to receive user message: %s %s", err, LogConn(conn))
   case pushPullMsg:
      // Increment counter of pending push/pulls
      numConcurrent := atomic.AddUint32(&m.pushPullReq, 1)  // 消息的推拉
      defer atomic.AddUint32(&m.pushPullReq, ^uint32(0))

      // Check if we have too many open push/pull requests
      if numConcurrent >= maxPushPullRequests {
         m.logger.Printf("[ERR] memberlist: Too many pending push/pull requests")

      join, remoteNodes, userState, err := m.readRemoteState(bufConn, dec) // 获取远端数据
      if err != nil {
         m.logger.Printf("[ERR] memberlist: Failed to read remote state: %s %s", err, LogConn(conn))

      if err := m.sendLocalState(conn, join); err != nil {  // 发送本地数据
         m.logger.Printf("[ERR] memberlist: Failed to push local state: %s %s", err, LogConn(conn))

      if err := m.mergeRemoteState(join, remoteNodes, userState); err != nil {  // 合并远端和本地数据
         m.logger.Printf("[ERR] memberlist: Failed push/pull merge: %s %s", err, LogConn(conn))
   case pingMsg:
      var p ping
      if err := dec.Decode(&p); err != nil {
         m.logger.Printf("[ERR] memberlist: Failed to decode ping: %s %s", err, LogConn(conn))

      if p.Node != "" && p.Node != m.config.Name {
         m.logger.Printf("[WARN] memberlist: Got ping for unexpected node %s %s", p.Node, LogConn(conn))

      ack := ackResp{p.SeqNo, nil}  // 解析ping
      out, err := encode(ackRespMsg, &ack)
      if err != nil {
         m.logger.Printf("[ERR] memberlist: Failed to encode ack: %s", err)

      err = m.rawSendMsgStream(conn, out.Bytes())  
      if err != nil {
         m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogConn(conn))
      m.logger.Printf("[ERR] memberlist: Received invalid msgType (%d) %s", msgType, LogConn(conn))


// packetListen is a long running goroutine that pulls packets out of the
// transport and hands them off for processing.
func (m *Memberlist) packetListen() {
	for {
		select {
		case packet := <-m.transport.PacketCh():
			m.ingestPacket(packet.Buf, packet.From, packet.Timestamp)

		case <-m.shutdownCh:

func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time) {
	// Check if encryption is enabled
	if m.config.EncryptionEnabled() {  // 是否是加密数据
		// Decrypt the payload
		plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil)
		if err != nil {
			if !m.config.GossipVerifyIncoming {
				// Treat the message as plaintext
				plain = buf
			} else {
				m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from))

		// Continue processing the plaintext buffer
		buf = plain

	// See if there's a checksum included to verify the contents of the message
	if len(buf) >= 5 && messageType(buf[0]) == hasCrcMsg {
		crc := crc32.ChecksumIEEE(buf[5:])
		expected := binary.BigEndian.Uint32(buf[1:5])
		if crc != expected {
			m.logger.Printf("[WARN] memberlist: Got invalid checksum for UDP packet: %x, %x", crc, expected)
		m.handleCommand(buf[5:], from, timestamp)  // 解析处理的命令
	} else {
		m.handleCommand(buf, from, timestamp)

func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Time) {
	if len(buf) < 1 {
		m.logger.Printf("[ERR] memberlist: missing message type byte %s", LogAddress(from))
	// Decode the message type
	msgType := messageType(buf[0])
	buf = buf[1:]

	// Switch on the msgType
	switch msgType {    // 根据不同数据类型进行处理
	case compoundMsg:
		m.handleCompound(buf, from, timestamp)
	case compressMsg:
		m.handleCompressed(buf, from, timestamp)

	case pingMsg:
		m.handlePing(buf, from)
	case indirectPingMsg:
		m.handleIndirectPing(buf, from)
	case ackRespMsg:
		m.handleAck(buf, from, timestamp)
	case nackRespMsg:
		m.handleNack(buf, from)

	case suspectMsg:
	case aliveMsg:
	case deadMsg:
	case userMsg:
		// Determine the message queue, prioritize alive  将数据放到优先级队列中处理
		queue := m.lowPriorityMsgQueue
		if msgType == aliveMsg {
			queue = m.highPriorityMsgQueue

		// Check for overflow and append if not full
		if queue.Len() >= m.config.HandoffQueueDepth {
			m.logger.Printf("[WARN] memberlist: handler queue full, dropping message (%d) %s", msgType, LogAddress(from))
		} else {
			queue.PushBack(msgHandoff{msgType, buf, from})

		// Notify of pending message
		select {
		case m.handoffCh <- struct{}{}:

		m.logger.Printf("[ERR] memberlist: msg type (%d) not supported %s", msgType, LogAddress(from))


// Schedule is used to ensure the Tick is performed periodically. This
// function is safe to call multiple times. If the memberlist is already
// scheduled, then it won't do anything.
func (m *Memberlist) schedule() {
	defer m.tickerLock.Unlock()

	// If we already have tickers, then don't do anything, since we're
	// scheduled
	if len(m.tickers) > 0 {

	// Create the stop tick channel, a blocking channel. We close this
	// when we should stop the tickers.
	stopCh := make(chan struct{})

	// Create a new probeTicker
	if m.config.ProbeInterval > 0 {
		t := time.NewTicker(m.config.ProbeInterval)
		go m.triggerFunc(m.config.ProbeInterval, t.C, stopCh, m.probe)  // 启动探测定时函数
		m.tickers = append(m.tickers, t)

	// Create a push pull ticker if needed
	if m.config.PushPullInterval > 0 {
		go m.pushPullTrigger(stopCh)  // 启动push pull同步任务

	// Create a gossip ticker if needed
	if m.config.GossipInterval > 0 && m.config.GossipNodes > 0 {
		t := time.NewTicker(m.config.GossipInterval)
		go m.triggerFunc(m.config.GossipInterval, t.C, stopCh, m.gossip)  // 启动广播定时任务
		m.tickers = append(m.tickers, t)

	// If we made any tickers, then record the stopTick channel for
	// later.
	if len(m.tickers) > 0 {
		m.stopTick = stopCh


// Tick is used to perform a single round of failure detection and gossip
func (m *Memberlist) probe() {
	// Track the number of indexes we've considered probing
	numCheck := 0        // 设置检查的数量  检查只是一个简单的循环检查

	// Make sure we don't wrap around infinitely
	if numCheck >= len(m.nodes) {
		m.nodeLock.RUnlock()   // 如果检查的数量超过节点数则退出本次检查

	// Handle the wrap around case
	if m.probeIndex >= len(m.nodes) {   // 如果检查的索引超过节点数则重置 重新开始检测
		m.probeIndex = 0
		goto START

	// Determine if we should probe this node
	skip := false
	var node nodeState

	node = *m.nodes[m.probeIndex]  // 获取当前的节点
	if node.Name == m.config.Name {  // 如果是自己则跳过
		skip = true
	} else if node.DeadOrLeft() {  // 如果死亡或者离开则跳过
		skip = true

	// Potentially skip
	m.probeIndex++   // 索引加1
	if skip {
		goto START

	// Probe the specific node
	m.probeNode(&node)  // 探测节点


// probeNode handles a single round of failure checking on a node.
func (m *Memberlist) probeNode(node *nodeState) {
	defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now())

	// We use our health awareness to scale the overall probe interval, so we
	// slow down if we detect problems. The ticker that calls us can handle
	// us running over the base interval, and will skip missed ticks.
	probeInterval := m.awareness.ScaleTimeout(m.config.ProbeInterval)
	if probeInterval > m.config.ProbeInterval {
		metrics.IncrCounter([]string{"memberlist", "degraded", "probe"}, 1)

	// Prepare a ping message and setup an ack handler.
	selfAddr, selfPort := m.getAdvertise()  // 获取本地的通信信息
	ping := ping{
		SeqNo:      m.nextSeqNo(),
		Node:       node.Name,
		SourceAddr: selfAddr,
		SourcePort: selfPort,
		SourceNode: m.config.Name,
	ackCh := make(chan ackMessage, m.config.IndirectChecks+1)
	nackCh := make(chan struct{}, m.config.IndirectChecks+1)
	m.setProbeChannels(ping.SeqNo, ackCh, nackCh, probeInterval)

	// Mark the sent time here, which should be after any pre-processing but
	// before system calls to do the actual send. This probably over-reports
	// a bit, but it's the best we can do. We had originally put this right
	// after the I/O, but that would sometimes give negative RTT measurements
	// which was not desirable.
	sent := time.Now()

	// Send a ping to the node. If this node looks like it's suspect or dead,
	// also tack on a suspect message so that it has a chance to refute as
	// soon as possible.
	deadline := sent.Add(probeInterval)
	addr := node.Address()

	// Arrange for our self-awareness to get updated.
	var awarenessDelta int
	defer func() {
	if node.State == StateAlive {   // 如果当前的节点状态为存活则直接发送ping消息
		if err := m.encodeAndSendMsg(node.FullAddress(), pingMsg, &ping); err != nil {
			m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err)
			if failedRemote(err) {
			} else {
	} else {
		var msgs [][]byte
		if buf, err := encode(pingMsg, &ping); err != nil {
			m.logger.Printf("[ERR] memberlist: Failed to encode ping message: %s", err)
		} else {
			msgs = append(msgs, buf.Bytes())
		s := suspect{Incarnation: node.Incarnation, Node: node.Name, From: m.config.Name}
		if buf, err := encode(suspectMsg, &s); err != nil {
			m.logger.Printf("[ERR] memberlist: Failed to encode suspect message: %s", err)
		} else {
			msgs = append(msgs, buf.Bytes())

		compound := makeCompoundMessage(msgs)  // 如果非存活状态则先加入ping消息然后再加入当前的状态信息
		if err := m.rawSendMsgPacket(node.FullAddress(), &node.Node, compound.Bytes()); err != nil {  // 发送当前的状态信息
			m.logger.Printf("[ERR] memberlist: Failed to send compound ping and suspect message to %s: %s", addr, err)
			if failedRemote(err) {
			} else {

	// Arrange for our self-awareness to get updated. At this point we've
	// sent the ping, so any return statement means the probe succeeded
	// which will improve our health until we get to the failure scenarios
	// at the end of this function, which will alter this delta variable
	// accordingly.
	awarenessDelta = -1

	// Wait for response or round-trip-time.
	select {
	case v := <-ackCh:
		if v.Complete == true {
			if m.config.Ping != nil {  // 如果是ping可以则标记完成
				rtt := v.Timestamp.Sub(sent)
				m.config.Ping.NotifyPingComplete(&node.Node, rtt, v.Payload)

		// As an edge case, if we get a timeout, we need to re-enqueue it
		// here to break out of the select below.
		if v.Complete == false {
			ackCh <- v
	case <-time.After(m.config.ProbeTimeout):
		// Note that we don't scale this timeout based on awareness and
		// the health score. That's because we don't really expect waiting
		// longer to help get UDP through. Since health does extend the
		// probe interval it will give the TCP fallback more time, which
		// is more active in dealing with lost packets, and it gives more
		// time to wait for indirect acks/nacks.
		m.logger.Printf("[DEBUG] memberlist: Failed ping: %s (timeout reached)", node.Name)

	// Get some random live nodes.   如果失败则随机选择一些节点进行Ping
	kNodes := kRandomNodes(m.config.IndirectChecks, m.nodes, func(n *nodeState) bool {
		return n.Name == m.config.Name ||
			n.Name == node.Name ||
			n.State != StateAlive

	// Attempt an indirect ping.
	expectedNacks := 0
	selfAddr, selfPort = m.getAdvertise()
	ind := indirectPingReq{
		SeqNo:      ping.SeqNo,
		Target:     node.Addr,
		Port:       node.Port,
		Node:       node.Name,
		SourceAddr: selfAddr,
		SourcePort: selfPort,
		SourceNode: m.config.Name,
	for _, peer := range kNodes {
		// We only expect nack to be sent from peers who understand
		// version 4 of the protocol.
		if ind.Nack = peer.PMax >= 4; ind.Nack {

		if err := m.encodeAndSendMsg(peer.FullAddress(), indirectPingMsg, &ind); err != nil {
			m.logger.Printf("[ERR] memberlist: Failed to send indirect ping: %s", err)
		}  // 向随机选择的节点进行非直接ping

	// Also make an attempt to contact the node directly over TCP. This
	// helps prevent confused clients who get isolated from UDP traffic
	// but can still speak TCP (which also means they can possibly report
	// misinformation to other nodes via anti-entropy), avoiding flapping in
	// the cluster.
	// This is a little unusual because we will attempt a TCP ping to any
	// member who understands version 3 of the protocol, regardless of
	// which protocol version we are speaking. That's why we've included a
	// config option to turn this off if desired.
	fallbackCh := make(chan bool, 1)   // 也通过tcp进行ack确认

	disableTcpPings := m.config.DisableTcpPings ||
		(m.config.DisableTcpPingsForNode != nil && m.config.DisableTcpPingsForNode(node.Name))
	if (!disableTcpPings) && (node.PMax >= 3) {
		go func() {
			defer close(fallbackCh)
			didContact, err := m.sendPingAndWaitForAck(node.FullAddress(), ping, deadline)
			if err != nil {
				m.logger.Printf("[ERR] memberlist: Failed fallback ping: %s", err)
			} else {
				fallbackCh <- didContact
	} else {

	// Wait for the acks or timeout. Note that we don't check the fallback
	// channel here because we want to issue a warning below if that's the
	// *only* way we hear back from the peer, so we have to let this time
	// out first to allow the normal UDP-based acks to come in.
	select {
	case v := <-ackCh:
		if v.Complete == true {   // 检查结果

	// Finally, poll the fallback channel. The timeouts are set such that
	// the channel will have something or be closed without having to wait
	// any additional time here.
	for didContact := range fallbackCh {
		if didContact {
			m.logger.Printf("[WARN] memberlist: Was able to connect to %s but other probes failed, network may be misconfigured", node.Name)

	// Update our self-awareness based on the results of this failed probe.
	// If we don't have peers who will send nacks then we penalize for any
	// failed probe as a simple health metric. If we do have peers to nack
	// verify, then we can use that as a more sophisticated measure of self-
	// health because we assume them to be working, and they can help us
	// decide if the probed node was really dead or if it was something wrong
	// with ourselves.
	awarenessDelta = 0
	if expectedNacks > 0 {
		if nackCount := len(nackCh); nackCount < expectedNacks {
			awarenessDelta += (expectedNacks - nackCount)
	} else {
		awarenessDelta += 1

	// No acks received from target, suspect it as failed.
	m.logger.Printf("[INFO] memberlist: Suspect %s has failed, no acks received", node.Name)
	s := suspect{Incarnation: node.Incarnation, Node: node.Name, From: m.config.Name}
	m.suspectNode(&s) // 如果没有ack结果则标记状态


// pushPullTrigger is used to periodically trigger a push/pull until
// a stop tick arrives. We don't use triggerFunc since the push/pull
// timer is dynamically scaled based on cluster size to avoid network
// saturation
func (m *Memberlist) pushPullTrigger(stop <-chan struct{}) {
	interval := m.config.PushPullInterval

	// Use a random stagger to avoid syncronizing
	randStagger := time.Duration(uint64(rand.Int63()) % uint64(interval))  // 设置随机的定时器
	select {
	case <-time.After(randStagger):
	case <-stop:

	// Tick using a dynamic timer
	for {
		tickTime := pushPullScale(interval, m.estNumNodes())    // 设置一个动态的节点检查定时器
		select {
		case <-time.After(tickTime):  // 定时进行数据的pull/push
		case <-stop:


// pushPull is invoked periodically to randomly perform a complete state
// exchange. Used to ensure a high level of convergence, but is also
// reasonably expensive as the entire state of this node is exchanged
// with the other node.
func (m *Memberlist) pushPull() {
	// Get a random live node
	nodes := kRandomNodes(1, m.nodes, func(n *nodeState) bool {
		return n.Name == m.config.Name ||
			n.State != StateAlive
	m.nodeLock.RUnlock()  // 随机选择一个节点进行数据的全量同步

	// If no nodes, bail
	if len(nodes) == 0 {
	node := nodes[0]

	// Attempt a push pull
	if err := m.pushPullNode(node.FullAddress(), false); err != nil { //进行数据的全量同步
		m.logger.Printf("[ERR] memberlist: Push/Pull with %s failed: %s", node.Name, err)

// pushPullNode does a complete state exchange with a specific node.
func (m *Memberlist) pushPullNode(a Address, join bool) error {
	defer metrics.MeasureSince([]string{"memberlist", "pushPullNode"}, time.Now())

	// Attempt to send and receive with the node
	remote, userState, err := m.sendAndReceiveState(a, join)  // 发送本机的状态信息
	if err != nil {
		return err

	if err := m.mergeRemoteState(join, remote, userState); err != nil { // 将获取的远端信息进行合并从而完成数据的全量同步
		return err
	return nil


// gossip is invoked every GossipInterval period to broadcast our gossip
// messages to a few random nodes.
func (m *Memberlist) gossip() {
	defer metrics.MeasureSince([]string{"memberlist", "gossip"}, time.Now())

	// Get some random live, suspect, or recently dead nodes
	kNodes := kRandomNodes(m.config.GossipNodes, m.nodes, func(n *nodeState) bool {
		if n.Name == m.config.Name {
			return true

		switch n.State {
		case StateAlive, StateSuspect:
			return false

		case StateDead:
			return time.Since(n.StateChange) > m.config.GossipToTheDeadTime

			return true

	// Compute the bytes available
	bytesAvail := m.config.UDPBufferSize - compoundHeaderOverhead
	if m.config.EncryptionEnabled() {
		bytesAvail -= encryptOverhead(m.encryptionVersion())

	for _, node := range kNodes {
		// Get any pending broadcasts
		msgs := m.getBroadcasts(compoundOverhead, bytesAvail)
		if len(msgs) == 0 {

		addr := node.Address()
		if len(msgs) == 1 {
			// Send single message as is
			if err := m.rawSendMsgPacket(node.FullAddress(), &node, msgs[0]); err != nil {
				m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err)
		} else {
			// Otherwise create and send a compound message
			compound := makeCompoundMessage(msgs)
			if err := m.rawSendMsgPacket(node.FullAddress(), &node, compound.Bytes()); err != nil {
				m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err)
		}  // 发送当前的数据




