Rides Cluster

8ZERO8

于 2022-02-27 21:07:08 发布

阅读量223

点赞数

分类专栏： rides 文章标签：分布式 redis

本文链接：https://blog.csdn.net/lifuxin73/article/details/122523391

版权

rides 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

代码版本redis-6.2.6

1.节点间通信内容

消息类型：

#define CLUSTERMSG_TYPE_PING 0          /* Ping */
#define CLUSTERMSG_TYPE_PONG 1          /* Pong (reply to Ping) */
#define CLUSTERMSG_TYPE_MEET 2          /* Meet "let's join" message */
#define CLUSTERMSG_TYPE_FAIL 3          /* Mark node xxx as failing */
#define CLUSTERMSG_TYPE_PUBLISH 4       /* Pub/Sub Publish propagation */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
#define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
#define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
#define CLUSTERMSG_TYPE_MODULE 9        /* Module cluster API message. */
#define CLUSTERMSG_TYPE_COUNT 10        /* Total number of message types. */

结构体：

typedef struct {
    char nodename[CLUSTER_NAMELEN];//节点名称
    uint32_t ping_sent;            //节点发送ping的时间 
    uint32_t pong_received;        //节点收到pong的时间
    char ip[NET_IP_STR_LEN];  /* 节点ip */
    uint16_t port;              /* 节点和客户端的通信端口 */
    uint16_t cport;             /* c节点集群中的通信端口 */
    uint16_t flags;             /* 节点的标记 */
    uint16_t pport;             /* plaintext-port, when base port is TLS */
    uint16_t notused1;
} clusterMsgDataGossip;
//联合体，根据具体的消息类型，有不同的结构体使用
union clusterMsgData {
    /* PING, MEET and PONG */
    struct {
        /* Array of N clusterMsgDataGossip structures */
        clusterMsgDataGossip gossip[1];
    } ping;

    /* FAIL */
    struct {
        clusterMsgDataFail about;
    } fail;

    /* PUBLISH */
    struct {
        clusterMsgDataPublish msg;
    } publish;

    /* UPDATE */
    struct {
        clusterMsgDataUpdate nodecfg;
    } update;

    /* MODULE */
    struct {
        clusterMsgModule msg;
    } module;
};
typedef struct {
    char sig[4];        /* Signature "RCmb" (Redis Cluster message bus). */
    uint32_t totlen;    //消息长度
    uint16_t ver;       /* Protocol version, currently set to 1. */
    uint16_t port;      /* TCP base port number. */
    uint16_t type;      //消息类型
    uint16_t count;     /* Only used for some kind of messages. */
    uint64_t currentEpoch;  /* The epoch accordingly to the sending node. */
    uint64_t configEpoch;   /* The config epoch if it's a master, or the last
                               epoch advertised by its master if it is a
                               slave. */
    uint64_t offset;    /* Master replication offset if node is a master or
                           processed replication offset if node is a slave. */
    char sender[CLUSTER_NAMELEN]; //发送节点名称
    unsigned char myslots[CLUSTER_SLOTS/8];//发送消息的节点负责的slot
    char slaveof[CLUSTER_NAMELEN];
    char myip[NET_IP_STR_LEN];    //发送消息节点的ip
    char notused1[32];  /* 32 bytes reserved for future usage. */
    uint16_t pport;      /* Sender TCP plaintext port, if base port is TLS */
    uint16_t cport;      //发送消息节点的端口
    uint16_t flags;      /* Sender node flags */
    unsigned char state; /* Cluster state from the POV of the sender */
    unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
    union clusterMsgData data; //消息内容
} clusterMsg;

2.集群节点集群功能实现

1.节点定时发送ping消息

void clusterCron(void)每秒执行10次，进行周期性执行。每当余10的时候执行一下代码：

clusterCron
{

 if (!(iteration % 10)) {
        int j;

        //随机选取5个节点，并找出最早向当前节点发送pong的节点
        for (j = 0; j < 5; j++) {
            de = dictGetRandomKey(server.cluster->nodes);
            clusterNode *this = dictGetVal(de);

            /* Don't ping nodes disconnected or with a ping currently active. */
            if (this->link == NULL || this->ping_sent != 0) continue;
            if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
                continue;
            if (min_pong_node == NULL || min_pong > this->pong_received) {
                min_pong_node = this;
                min_pong = this->pong_received;
            }
        }
        if (min_pong_node) {
            serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
            clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
        }
    }

每分钟当前节点会随机选取5个节点，且符合以上要求的里面找出最早给当前节点发送pong的节点，并向其发送ping消息。节点每分钟随机向1个其他节点发送ping消息。

2.节点收到ping消息回复pong消息

//对消息进行处理
int clusterProcessPacket(clusterLink *link) {
...........................
......................
    //判断消息类型是否为MEET或者PING
    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
        serverLog(LL_DEBUG,"Ping packet received: %p", (void*)link->node);

        
        if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
            server.cluster_announce_ip == NULL)
        {
            char ip[NET_IP_STR_LEN];

            if (connSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
                strcmp(ip,myself->ip))
            {
                memcpy(myself->ip,ip,NET_IP_STR_LEN);
                serverLog(LL_WARNING,"IP address for this node updated to %s",
                    myself->ip);
                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
            }
        }

     
        if (!sender && type == CLUSTERMSG_TYPE_MEET) {
            clusterNode *node;

            node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
            nodeIp2String(node->ip,link,hdr->myip);
            node->port = ntohs(hdr->port);
            node->pport = ntohs(hdr->pport);
            node->cport = ntohs(hdr->cport);
            clusterAddNode(node);
            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
        }

        if (!sender && type == CLUSTERMSG_TYPE_MEET)
            clusterProcessGossipSection(hdr,link);

        //回复PONG
        clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
    }
..............................
}

回复PONG消息的时候，报文中也会有部分其他节点信息，最终每个节点都有所有节点的信息

节点1和节点2进行pingpong，最终节点1和2互换节点信息，都可得到全部节点信息

3.节点加入集群，进行meet

向节点Node1发送meet命令，将节点Node2加入集群

4.故障转移

slave检查到所属master离线，通知所有节点发起故障转移。只有master才参与投票，票数超过1/2则成为主节点。

void clusterFailoverReplaceYourMaster(void) {
    int j;
    clusterNode *oldmaster = myself->slaveof;

    if (nodeIsMaster(myself) || oldmaster == NULL) return;

    /* 1) 切换当前节点为Master节点 */
    clusterSetNodeAsMaster(myself);
    replicationUnsetMaster();

    /* 2) 申请之前master管理的所有slot */
    for (j = 0; j < CLUSTER_SLOTS; j++) {
        if (clusterNodeGetSlotBit(oldmaster,j)) {
            clusterDelSlot(j);
            clusterAddSlot(myself,j);
        }
    }

    /* 3) 更新状态保存配置 */
    clusterUpdateState();
    clusterSaveConfigOrDie(1);

    /* 4) 连接所有节点，以便他们更新相应的状态，并检测到我们切换为master */
    clusterBroadcastPong(CLUSTER_BROADCAST_ALL);

    /* 5) 如果正在进行手动故障切换，请清除状态重置手动故障切换 */
    resetManualFailover();
}

3.主从同步

1.slave节点同步

//设置当节点为指定节点的slave
void replicationSetMaster(char *ip, int port) {
    int was_master = server.masterhost == NULL;

    sdsfree(server.masterhost);
    server.masterhost = NULL;
    if (server.master) {
        freeClient(server.master);
    }
    disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */

    /* Setting masterhost only after the call to freeClient since it calls
     * replicationHandleMasterDisconnection which can trigger a re-connect
     * directly from within that call. */
    server.masterhost = sdsnew(ip);
    server.masterport = port;

    /* Update oom_score_adj */
    setOOMScoreAdj(-1);

     //断开所有从服务器
    disconnectSlaves();
    cancelReplicationHandshake(0);
    /* Before destroying our master state, create a cached master using
     * our own parameters, to later PSYNC with the new master. */
    if (was_master) {
        //清空可能存在的master缓存
        replicationDiscardCachedMaster();
        replicationCacheMasterUsingMyself();
    }

    /* Fire the role change modules event. */
    moduleFireServerEvent(REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
                          REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA,
                          NULL);

    /* Fire the master link modules event. */
    if (server.repl_state == REPL_STATE_CONNECTED)
        moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
                              REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
                              NULL);
    //进入连接状态
    server.repl_state = REPL_STATE_CONNECT;
    serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
        server.masterhost, server.masterport);
    connectWithMaster();
}

2.master节点同步

feedReplicationBacklog():master需要同步给Slave的数据通过此函数加入缓存区

addReplyReplicationBacklog():根据slave的offset，发送相应数据

8ZERO8

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Rides Cluster

Rides Cluster中使用了Gossip协议进行集群信息同步1.节点间通信内容消息类型：#define CLUSTERMSG_TYPE_PING 0 /* Ping */#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */#def...
复制链接

扫一扫

专栏目录