Rides Cluster

目录

代码版本redis-6.2.6

1.节点间通信内容

消息类型:

2.集群节点集群功能实现

1.节点定时发送ping消息

2.节点收到ping消息回复pong消息

3.节点加入集群,进行meet

4.故障转移

3.主从同步

         1.slave节点同步

        2.master节点同步


代码版本redis-6.2.6

1.节点间通信内容

消息类型:

#define CLUSTERMSG_TYPE_PING 0          /* Ping */
#define CLUSTERMSG_TYPE_PONG 1          /* Pong (reply to Ping) */
#define CLUSTERMSG_TYPE_MEET 2          /* Meet "let's join" message */
#define CLUSTERMSG_TYPE_FAIL 3          /* Mark node xxx as failing */
#define CLUSTERMSG_TYPE_PUBLISH 4       /* Pub/Sub Publish propagation */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
#define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
#define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
#define CLUSTERMSG_TYPE_MODULE 9        /* Module cluster API message. */
#define CLUSTERMSG_TYPE_COUNT 10        /* Total number of message types. */

结构体:

typedef struct {
    char nodename[CLUSTER_NAMELEN];//节点名称
    uint32_t ping_sent;            //节点发送ping的时间 
    uint32_t pong_received;        //节点收到pong的时间
    char ip[NET_IP_STR_LEN];  /* 节点ip */
    uint16_t port;              /* 节点和客户端的通信端口 */
    uint16_t cport;             /* c节点集群中的通信端口 */
    uint16_t flags;             /* 节点的标记 */
    uint16_t pport;             /* plaintext-port, when base port is TLS */
    uint16_t notused1;
} clusterMsgDataGossip;
//联合体,根据具体的消息类型,有不同的结构体使用
union clusterMsgData {
    /* PING, MEET and PONG */
    struct {
        /* Array of N clusterMsgDataGossip structures */
        clusterMsgDataGossip gossip[1];
    } ping;

    /* FAIL */
    struct {
        clusterMsgDataFail about;
    } fail;

    /* PUBLISH */
    struct {
        clusterMsgDataPublish msg;
    } publish;

    /* UPDATE */
    struct {
        clusterMsgDataUpdate nodecfg;
    } update;

    /* MODULE */
    struct {
        clusterMsgModule msg;
    } module;
};
typedef struct {
    char sig[4];        /* Signature "RCmb" (Redis Cluster message bus). */
    uint32_t totlen;    //消息长度
    uint16_t ver;       /* Protocol version, currently set to 1. */
    uint16_t port;      /* TCP base port number. */
    uint16_t type;      //消息类型
    uint16_t count;     /* Only used for some kind of messages. */
    uint64_t currentEpoch;  /* The epoch accordingly to the sending node. */
    uint64_t configEpoch;   /* The config epoch if it's a master, or the last
                               epoch advertised by its master if it is a
                               slave. */
    uint64_t offset;    /* Master replication offset if node is a master or
                           processed replication offset if node is a slave. */
    char sender[CLUSTER_NAMELEN]; //发送节点名称
    unsigned char myslots[CLUSTER_SLOTS/8];//发送消息的节点负责的slot
    char slaveof[CLUSTER_NAMELEN];
    char myip[NET_IP_STR_LEN];    //发送消息节点的ip
    char notused1[32];  /* 32 bytes reserved for future usage. */
    uint16_t pport;      /* Sender TCP plaintext port, if base port is TLS */
    uint16_t cport;      //发送消息节点的端口
    uint16_t flags;      /* Sender node flags */
    unsigned char state; /* Cluster state from the POV of the sender */
    unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
    union clusterMsgData data; //消息内容
} clusterMsg;

2.集群节点集群功能实现

1.节点定时发送ping消息

void clusterCron(void)每秒执行10次,进行周期性执行。每当余10的时候执行一下代码:

clusterCron
{

 if (!(iteration % 10)) {
        int j;

        //随机选取5个节点,并找出最早向当前节点发送pong的节点
        for (j = 0; j < 5; j++) {
            de = dictGetRandomKey(server.cluster->nodes);
            clusterNode *this = dictGetVal(de);

            /* Don't ping nodes disconnected or with a ping currently active. */
            if (this->link == NULL || this->ping_sent != 0) continue;
            if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
                continue;
            if (min_pong_node == NULL || min_pong > this->pong_received) {
                min_pong_node = this;
                min_pong = this->pong_received;
            }
        }
        if (min_pong_node) {
            serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
            clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
        }
    }

每分钟当前节点会随机选取5个节点,且符合以上要求的里面找出最早给当前节点发送pong的节点,并向其发送ping消息。节点每分钟随机向1个其他节点发送ping消息。

2.节点收到ping消息回复pong消息

//对消息进行处理
int clusterProcessPacket(clusterLink *link) {
...........................
......................
    //判断消息类型是否为MEET或者PING
    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
        serverLog(LL_DEBUG,"Ping packet received: %p", (void*)link->node);

        
        if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
            server.cluster_announce_ip == NULL)
        {
            char ip[NET_IP_STR_LEN];

            if (connSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
                strcmp(ip,myself->ip))
            {
                memcpy(myself->ip,ip,NET_IP_STR_LEN);
                serverLog(LL_WARNING,"IP address for this node updated to %s",
                    myself->ip);
                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
            }
        }

     
        if (!sender && type == CLUSTERMSG_TYPE_MEET) {
            clusterNode *node;

            node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
            nodeIp2String(node->ip,link,hdr->myip);
            node->port = ntohs(hdr->port);
            node->pport = ntohs(hdr->pport);
            node->cport = ntohs(hdr->cport);
            clusterAddNode(node);
            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
        }

        if (!sender && type == CLUSTERMSG_TYPE_MEET)
            clusterProcessGossipSection(hdr,link);

        //回复PONG
        clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
    }
..............................
}

回复PONG消息的时候,报文中也会有部分其他节点信息,最终每个节点都有所有节点的信息

 节点1和节点2进行pingpong,最终节点1和2互换节点信息,都可得到全部节点信息

3.节点加入集群,进行meet

  向节点Node1发送meet命令,将节点Node2加入集群

4.故障转移

        slave检查到所属master离线,通知所有节点发起故障转移。只有master才参与投票,票数超过1/2则成为主节点。

void clusterFailoverReplaceYourMaster(void) {
    int j;
    clusterNode *oldmaster = myself->slaveof;

    if (nodeIsMaster(myself) || oldmaster == NULL) return;

    /* 1) 切换当前节点为Master节点 */
    clusterSetNodeAsMaster(myself);
    replicationUnsetMaster();

    /* 2) 申请之前master管理的所有slot */
    for (j = 0; j < CLUSTER_SLOTS; j++) {
        if (clusterNodeGetSlotBit(oldmaster,j)) {
            clusterDelSlot(j);
            clusterAddSlot(myself,j);
        }
    }

    /* 3) 更新状态保存配置 */
    clusterUpdateState();
    clusterSaveConfigOrDie(1);

    /* 4) 连接所有节点,以便他们更新相应的状态,并检测到我们切换为master */
    clusterBroadcastPong(CLUSTER_BROADCAST_ALL);

    /* 5) 如果正在进行手动故障切换,请清除状态重置手动故障切换 */
    resetManualFailover();
}

3.主从同步

asd
slave从master全量同步数据

         1.slave节点同步

Slave全量同步

Slave增量同步

//设置当节点为指定节点的slave
void replicationSetMaster(char *ip, int port) {
    int was_master = server.masterhost == NULL;

    sdsfree(server.masterhost);
    server.masterhost = NULL;
    if (server.master) {
        freeClient(server.master);
    }
    disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */

    /* Setting masterhost only after the call to freeClient since it calls
     * replicationHandleMasterDisconnection which can trigger a re-connect
     * directly from within that call. */
    server.masterhost = sdsnew(ip);
    server.masterport = port;

    /* Update oom_score_adj */
    setOOMScoreAdj(-1);

     //断开所有从服务器
    disconnectSlaves();
    cancelReplicationHandshake(0);
    /* Before destroying our master state, create a cached master using
     * our own parameters, to later PSYNC with the new master. */
    if (was_master) {
        //清空可能存在的master缓存
        replicationDiscardCachedMaster();
        replicationCacheMasterUsingMyself();
    }

    /* Fire the role change modules event. */
    moduleFireServerEvent(REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
                          REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA,
                          NULL);

    /* Fire the master link modules event. */
    if (server.repl_state == REPL_STATE_CONNECTED)
        moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
                              REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
                              NULL);
    //进入连接状态
    server.repl_state = REPL_STATE_CONNECT;
    serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
        server.masterhost, server.masterport);
    connectWithMaster();
}

        2.master节点同步

        

全量同步rdb流程
部分同步

 feedReplicationBacklog():master需要同步给Slave的数据通过此函数加入缓存区

addReplyReplicationBacklog():根据slave的offset,发送相应数据

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值