Snowflake雪花算法

最新推荐文章于 2024-06-26 08:48:38 发布

lyc_code

最新推荐文章于 2024-06-26 08:48:38 发布

阅读量749

点赞数

分类专栏：算法

本文链接：https://blog.csdn.net/linyechuan/article/details/100041743

版权

算法专栏收录该内容

1 篇文章 0 订阅

订阅专栏

参考链接：https://zhuanlan.zhihu.com/p/65095562

在应用程序中，经常需要全局唯一的ID作为数据库主键。如何生成全局唯一ID？

首先，需要确定全局唯一ID是整型还是字符串？如果是字符串，那么现有的UUID就完全满足需求，不需要额外的工作。缺点是字符串作为ID占用空间大，索引效率比整型低。

如果采用整型作为ID，那么首先排除掉32位int类型，因为范围太小，必须使用64位long型。

采用整型作为ID时，如何生成自增、全局唯一且不重复的ID？

方案一：利用数据库的自增ID，从1开始，基本可以做到连续递增。Oracle可以用SEQUENCE，MySQL可以用主键的AUTO_INCREMENT，虽然不能保证全局唯一，但每个表唯一，也基本满足需求。

数据库自增ID的缺点是数据在插入前，无法获得ID。数据在插入后，获取的ID虽然是唯一的，但一定要等到事务提交后，ID才算是有效的。有些双向引用的数据，不得不插入后再做一次更新，比较麻烦。

第二种方式是采用一个集中式ID生成器，它可以是Redis，也可以是ZooKeeper，也可以利用数据库的表记录最后分配的ID。

这种方式最大的缺点是复杂性太高，需要严重依赖第三方服务，而且代码配置繁琐。一般来说，越是复杂的方案，越不可靠，并且测试越痛苦。

第三种方式是类似Twitter的Snowflake算法，它给每台机器分配一个唯一标识，然后通过时间戳+标识+自增实现全局唯一ID。这种方式好处在于ID生成算法完全是一个无状态机，无网络调用，高效可靠。缺点是如果唯一标识有重复，会造成ID冲突。

Snowflake算法采用41bit毫秒时间戳，加上10bit机器ID，加上12bit序列号，理论上最多支持1024台机器每秒生成4096000个序列号，对于Twitter的规模来说够用了。

但是对于绝大部分普通应用程序来说，根本不需要每秒超过400万的ID，机器数量也达不到1024台，所以，我们可以改进一下，使用更短的ID生成方式：

53bitID由32bit秒级时间戳+16bit自增+5bit机器标识组成，累积32台机器，每秒可以生成65万个序列号

Snowflake算法采用41bit毫秒时间戳，加上10bit机器ID，加上12bit序列号，理论上最多支持1024台机器每秒生成4096000个序列号，

* 对于Twitter的规模来说够用了。

* 但是对于绝大部分普通应用程序来说，根本不需要每秒超过400万的ID，机器数量也达不到1024台，所以，我们可以改进一下，使用更短的ID生成方式：

* 53bitID由32bit秒级时间戳+16bit自增+5bit机器标识组成，累积32台机器，每秒可以生成6.5万个序列号

* 53 bits unique id:

* |--------|--------|--------|--------|--------|--------|--------|--------|

* |00000000|00011111|11111111|11111111|11111111|11111111|11111111|11111111|

* |--------|---xxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxx-----|--------|--------|

* |--------|--------|--------|--------|--------|---xxxxx|xxxxxxxx|xxx-----|

* |--------|--------|--------|--------|--------|--------|--------|---xxxxx|

* Maximum ID = 11111_11111111_11111111_11111111_11111111_11111111_11111111

* Maximum TS = 11111_11111111_11111111_11111111_111

* Maximum NT = ----- -------- -------- -------- ---11111_11111111_111 = 65535

* Maximum SH = ----- -------- -------- -------- -------- -------- ---11111 = 31

* It can generate 64k unique id per IP and up to 2106-02-07T06:28:15Z.

算法C++代码实现

uuid.h头文件

#ifndef _UUID_H_

#define _UUID_H_

#include <string>

#include <stdint.h>

using namespace std;

#define LLong long long

class CUuid

{

    public:

        CUuid();

        ~CUuid();

        LLong getServerIdAsLong();

        LLong nextId(LLong epochSecond);

        LLong generateId(LLong epochSecond, LLong next, LLong shardId);

        string GenUnid();

    private:

        LLong m_offset;

        LLong m_lastEpoch;

        LLong m_SHARD_ID;

};

#endif

uuid.cpp

#include "uuid.h"

#include <sys/time.h>

#include <unistd.h>/* gethostname */

#include <netdb.h> /* struct hostent */

#include <arpa/inet.h> /* inet_ntop */

#include <sstream>

#include <climits>

#define MAX_NEXT 0b1111111111111111L

#define OFFSET 946656000L //开始时间从2000-01-01 开始

CUuid::CUuid()

{

    m_offset = 0;

    m_lastEpoch = 0;

    m_SHARD_ID = getServerIdAsLong();

    m_SHARD_ID = m_SHARD_ID & 0x1F; // SH控制在0~31

}

CUuid::~CUuid()

{

}



LLong CUuid::getServerIdAsLong()

{

    LLong lIp = 0;

    char name[256];

    gethostname(name, sizeof(name)); 

    struct hostent* host = gethostbyname(name);

    char szIp[32];

    const char* ret = inet_ntop(host->h_addrtype, 
                              host->h_addr_list[0], szIp,  sizeof(szIp));

    if (NULL==ret) {

        cout<<"hostname transform to ip failed"<<endl;

        return -1;

    }

    lIp = htonl(inet_addr(szIp));

    return lIp;

}


LLong CUuid::nextId(LLong epochSecond)

{

    if(epochSecond < m_lastEpoch)

    {

        // warning: clock is turn back:时间回拨

        cout<<"clock is back: "<<epochSecond <<"from previous:"<<m_lastEpoch<<endl;

        epochSecond = m_lastEpoch;

    }

    if(m_lastEpoch != epochSecond)

    {

        m_lastEpoch = epochSecond;

        m_offset = 0;

    }

    m_offset++;

    LLong next = m_offset & MAX_NEXT;

    if(next == 0) 
    {

        cout<<"maximum id reached in 1 second in epoch: "<<epochSecond<<endl;

        return nextId(epochSecond + 1);

    }

    return generateId(epochSecond, next, m_SHARD_ID);

}



LLong CUuid::generateId(LLong epochSecond, LLong next, LLong shardId)

{

    //OFFSET起始时间戳，用于用当前时间戳减去这个时间戳，算出偏移量

    //16bit自增+5bit机器标识->21

    return ((epochSecond - OFFSET) << 21) | (next << 5) | shardId;

}



int main()

{

    struct timeval tv;

    gettimeofday(&tv, NULL);
    CUuid uuid;

    LLong lRet = uuid.nextId(tv.tv_sec);

    ostringstream os;

    os << lRet;

    string result;

    istringstream is(os.str());

    is >> result;

    cout<<result<<endl;

    return 0;

}