一.雪花算法的基本定义
1.雪花算法的原理就是生成一个64位的全局唯一id,由1bit符号位+41bit时间戳+10bit机器id+12bit序列号,第一位符号位不可调整,其他可以调整。
2. 41位的时间戳可以表示 (1 << 41) / (1000 * 60 * 60 * 24 * 365) = 69 年
3. 10bit可以表示 1 << 10 = 1024 台机器,一般分5bit给数据中心,5bit给机器id
4.12bit自增序列号可以表示 (1 << 12) 个ID。理论上snowflake方案的QPS约为409.6w/s.
5. 优点
(1) 毫秒数在高位,自增序列在低位,整个ID都是去是递增
(2)可以不依赖数据库个第三方系统,以服务的方式部署,稳定性高,生成ID的性能也非常高。
(3)可以根据自身业务特性分配bit位,灵活
6.缺点
非常依赖机器时间钟,如果机器时间钟回拨,会导致生成重复ID。
二.普通实现-go代码如下
package main
import (
"errors"
"fmt"
"sync"
"time"
)
const (
workerIDBits = uint64(5) // 10 bit工作机器ID中的5bit workerID
dataCenterIDBits = uint64(5) // 10 bit工作机器ID中的5bit dataCenterID
sequenceBits = uint(12) // 12bit的序列号
maxWorkerID = int64(-1) ^ (int64(-1) << workerIDBits)
maxDataCenterID = int64(-1) ^ (int64(-1) << dataCenterIDBits)
maxSequence = int64(-1) ^ (int64(-1) << sequenceBits)
timeLeft = uint8(22)
dataLeft = uint8(17)
workLeft = uint8(12)
twepoch = int64(1692704479)
)
type Worker struct {
mu sync.Mutex
LastStamp int64 // 记录上一次ID的时间戳
WorkerID int64 // 该节点的ID
DataCenterID int64 // 该节点的数据中心ID
Sequence int64 // 当前毫秒已经生成的ID序列号(从0开始累加)1毫秒内最多生成4096个ID
}
func NewWorker(workerId, dataCenterId int64) *Worker {
if workerId < 0 || workerId > maxWorkerID {
panic("机器id太大")
}
if dataCenterId < 0 || dataCenterId > maxDataCenterID {
panic("数据中心id太大")
}
return &Worker{
WorkerID: workerId,
DataCenterID: dataCenterId,
LastStamp: 0,
Sequence: 0,
}
}
func (w *Worker) getMilliSeconds() int64 {
return time.Now().UnixMilli()
}
func (w *Worker) NextID() (uint64, error) {
w.mu.Lock()
defer w.mu.Unlock()
return w.nextID()
}
func (w *Worker) nextID() (uint64, error) {
timeStamp := w.getMilliSeconds()
if timeStamp < w.LastStamp {
return 0, errors.New("time is moving backwards,waiting until")
}
if w.LastStamp == timeStamp {
// 再用一毫秒内,递增sequence,并且比较是否大于最大的maxSequence
w.Sequence = (w.Sequence + 1) & maxSequence
// 生成的序列号个数超出 maxSequence,重新获取时间戳
if w.Sequence == 0 {
for timeStamp <= w.LastStamp {
timeStamp = w.getMilliSeconds()
}
}
} else {
// 当前时间与上一次生成Id时间不一样,重置sequence的值
w.Sequence = 0
}
w.LastStamp = timeStamp
id := ((timeStamp - twepoch) << timeLeft) |
(w.DataCenterID << dataLeft) |
(w.WorkerID << workLeft) |
w.Sequence
return uint64(id), nil
}
var wg sync.WaitGroup
func main() {
w := NewWorker(5, 5)
ch := make(chan uint64, 10000)
defer close(ch)
count := 10000
wg.Add(count)
for i := 0; i < count; i++ {
go func() {
defer wg.Done()
id, _ := w.NextID()
ch <- id
}()
}
wg.Wait()
m := make(map[uint64]int)
for i := 0; i < count; i++ {
id := <-ch
_, ok := m[id]
if ok {
fmt.Printf("repeat id %d\n", id)
return
}
m[id] = i
}
fmt.Println("ALL", len(m), "snowflake ID Get successed!")
}
三.时钟回拨问题和解决方案
因为机器本地时钟可能会因为各种愿意发生不准的情况,网络中提供了NTP服务来做时间校准,做校准的时候就会发生时钟的跳跃或者回拨问题。
解决思路有以下:
1. 不依赖机器时钟驱动,自然就没有时钟回拨的问题。在初始时间戳自增,不跟随机器时钟增加,当序列号达到最大的时候,在时间戳上+1,这样不会浪费序列号,适合流量比较大的场景,如果流量比较小,可能会出现时间段层滞后。
package main
import (
"fmt"
"sync"
"time"
)
const (
TestWorkIdBits = uint64(5)
TestDataCenterBits = uint64(5)
TestSequenceBits = uint64(12)
testWorkBitsMax = -1 ^ (-1 << TestWorkIdBits)
testDataCenterBitsMax = -1 ^ (-1 << TestDataCenterBits)
testSequenceBitsMax = -1 ^ (-1 << TestSequenceBits)
Testtwepech = int64(1692704479) // 当前时间的时间戳 毫秒
maxLength = 2000
testTimeShift = int64(22)
testDataCenterShift = int(17)
testWorkIdShift = int(12)
)
var TestSequenceHistory = make([]int64, maxLength)
var startTimeStamp = int64(1692766479)
type TestWork struct {
mu sync.RWMutex
WorkID int64
DataCenterID int64
LastStamp int64
Sequence int64
}
func NewTestWork(workId, dataCenterId int64) *TestWork {
if workId < 0 || workId > maxWorkerID {
panic("机器id太大")
}
if dataCenterId < 0 || dataCenterId > maxDataCenterID {
panic("数据中心id太大")
}
return &TestWork{
WorkID: workId,
DataCenterID: dataCenterId,
LastStamp: 0,
Sequence: 0,
}
}
func (*TestWork) testGetMillisSecond() int64 {
return time.Now().UnixMilli()
}
func (w *TestWork) TestNextID() (int64, error) {
w.mu.Lock()
defer w.mu.Unlock()
return w.testNextID()
}
func (w *TestWork) testNextID() (int64, error) {
w.Sequence = (w.Sequence + 1) & testSequenceBitsMax
if w.Sequence == 0 {
startTimeStamp++
}
id := ((startTimeStamp - Testtwepech) << testTimeShift) |
(w.DataCenterID << testDataCenterShift) |
(w.WorkID << testWorkIdShift) |
w.Sequence
return int64(id), nil
}
func main() {
var wg sync.WaitGroup
count := 10000
w := NewTestWork(5, 5)
ch := make(chan int64, count)
wg.Add(count)
for i := 0; i < count; i++ {
go func() {
defer wg.Done()
id, _ := w.TestNextID()
ch <- id
}()
}
wg.Wait()
maps := make(map[int64]int, count)
for i := 0; i < count; i++ {
id := <-ch
_, ok := maps[id]
if ok {
fmt.Println("重复")
return
}
maps[id] = i
}
fmt.Println("ALL", len(maps), "snowflake generator success")
}
2.依赖时钟机器,如果时钟机器回拨范围比较小,比如几十毫秒,可以等待一段时间让他恢复到正常。如果流量不大,前几百毫秒或者几秒内肯定有序列号没用完,可以将前面几百毫秒的时间的最大序列号缓存起来,如果发生时钟回拨,就从缓存中获取序列号自增。
2.1 等待时间
package main
import (
"fmt"
"sync"
"time"
)
const (
TestWorkIdBits = uint64(5)
TestDataCenterBits = uint64(5)
TestSequenceBits = uint64(12)
testWorkBitsMax = -1 ^ (-1 << TestWorkIdBits)
testDataCenterBitsMax = -1 ^ (-1 << TestDataCenterBits)
testSequenceBitsMax = -1 ^ (-1 << TestSequenceBits)
Testtwepech = int64(1692704479) // 当前时间的时间戳 毫秒
maxLength = 2000
testTimeShift = int64(22)
testDataCenterShift = int(17)
testWorkIdShift = int(12)
)
var TestSequenceHistory = make([]int64, maxLength)
type TestWork struct {
mu sync.RWMutex
WorkID int64
DataCenterID int64
LastStamp int64
Sequence int64
}
func NewTestWork(workId, dataCenterId int64) *TestWork {
if workId < 0 || workId > maxWorkerID {
panic("机器id太大")
}
if dataCenterId < 0 || dataCenterId > maxDataCenterID {
panic("数据中心id太大")
}
return &TestWork{
WorkID: workId,
DataCenterID: dataCenterId,
LastStamp: 0,
Sequence: 0,
}
}
func (*TestWork) testGetMillisSecond() int64 {
return time.Now().UnixMilli()
}
func (w *TestWork) TestNextID() (int64, error) {
w.mu.Lock()
defer w.mu.Unlock()
return w.testNextID()
}
func (w *TestWork) testNextID() (int64, error) {
timeStamp := w.testGetMillisSecond()
if timeStamp < w.LastStamp {
sleepCnt := 0
sleepMaxCnt := 2
for {
if sleepCnt < sleepMaxCnt && w.LastStamp-timeStamp <= 500 {
// 休眠 500ms后接着重试
time.Sleep(500 * time.Millisecond)
timeStamp = w.testGetMillisSecond()
sleepCnt++
} else {
panic("生成id超时")
}
if timeStamp >= w.LastStamp {
break
}
}
}
if timeStamp == w.LastStamp {
w.Sequence = (w.Sequence + 1) & testSequenceBitsMax
if w.Sequence == 0 {
for timeStamp <= w.LastStamp {
timeStamp = w.testGetMillisSecond()
}
}
} else {
w.Sequence = 0
}
w.LastStamp = timeStamp
id := ((timeStamp - Testtwepech) << testTimeShift) |
(w.DataCenterID << testDataCenterShift) |
(w.WorkID << testWorkIdShift) |
w.Sequence
return int64(id), nil
}
func main() {
var wg sync.WaitGroup
count := 10000
w := NewTestWork(5, 5)
ch := make(chan int64, count)
wg.Add(count)
for i := 0; i < count; i++ {
go func() {
defer wg.Done()
id, _ := w.TestNextID()
ch <- id
}()
}
wg.Wait()
maps := make(map[int64]int, count)
for i := 0; i < count; i++ {
id := <-ch
_, ok := maps[id]
if ok {
fmt.Println("重复")
return
}
maps[id] = i
}
fmt.Println("ALL", len(maps), "snowflake generator success")
}
2.2缓存
package main
import (
"fmt"
"sync"
"time"
)
const (
WorkIDBits = uint64(5) // 工作机器id 5bit
DataCenterBits = uint64(5) // 工作机器数据中心 5bit
SequenceBits = uint64(12) // 序列号 12bit
WorkIDBitsMax = -1 ^ (-1 << WorkIDBits) // 工作机器ID最大值
DataCenterBitsMax = -1 ^ (-1 << DataCenterBits) // 工作机器数据中心最大值
SequenceBitsMax = -1 ^ (-1 << SequenceBits) // 序列号最大值
TimeShift = int64(22) // 时间戳偏移量
DataCenterBitsShift = int(17) // 数据中心偏移量
WorkIDBitsShift = int(12) // 工作机器id偏移量
Twepech = int64(1692704479) // 当前时间的时间戳 毫秒
MaxLength = 2000 // 最大允许的时间回拨的毫秒数
)
var SequenceHistory = make([]int64, MaxLength) // 缓存近2s内的每毫秒最大序列号
type Work struct {
mu sync.RWMutex
LastStamp int64 // 上次生成id的时间
Sequence int64 // 序列号
WorkID int64 // 工作机器id
DataCenter int64 // 数据中心id
}
func NewWorkGenerator(workID, dataCenter int64) *Work {
if workID < 0 || workID > maxWorkerID {
panic("机器id太大")
}
if dataCenter < 0 || dataCenter > maxDataCenterID {
panic("数据中心id太大")
}
return &Work{
LastStamp: 0,
Sequence: 0,
WorkID: workID,
DataCenter: dataCenter,
}
}
// getMillisSecond 获取当前毫秒数
func (w *Work) getMillisSecond() int64 {
return time.Now().UnixMilli()
}
// generatorID 生成ID
func (w *Work) generatorID() int64 {
w.mu.Lock()
defer w.mu.Unlock()
// 1.获取当前时间
timeStamp := w.getMillisSecond()
// 索引记录当前毫秒数下最大序列号所在数组的位置
index := int(timeStamp % MaxLength)
// 2.出现时间回拨
if timeStamp < w.LastStamp {
// 判断是否在缓存时间范围内
if w.LastStamp-timeStamp > MaxLength {
panic("时间会退范围太大,超过2000ms缓存")
}
w.Sequence = 0
// 在缓存范围内
for {
// 拿到之前毫秒数生成的最大序列号
preSequence := SequenceHistory[index]
// 判断之前的序列号是否已经到达最大值
w.Sequence = SequenceBitsMax & (preSequence + 1)
// 如果到达最大值,重新获取毫秒数
if w.Sequence == 0 {
timeStamp = w.getMillisSecond()
index = int(timeStamp % MaxLength)
} else {
// 没有到达最大值,更新缓存,生成id
SequenceHistory[index] = w.Sequence
id := ((timeStamp - Twepech) << TimeShift) |
(w.DataCenter << DataCenterBitsShift) |
(w.WorkID << WorkIDBitsShift) |
w.Sequence
return int64(id)
}
// 结束循环条件
if timeStamp >= w.LastStamp {
break
}
}
}
// 3.没有出现时间回拨
// 如果时间相等
if timeStamp == w.LastStamp {
// 判断生成的序列号是否超出范围
w.Sequence = (w.Sequence + 1) & SequenceBitsMax
// 如果超出范围
if w.Sequence == 0 {
// 当前毫秒生成序列号的个数已满,重新获取下一毫秒
for timeStamp <= w.LastStamp {
timeStamp = w.getMillisSecond()
index = int(timeStamp % MaxLength)
}
}
} else {
// 时间大于上次生成id的时间,重置序列号sequence
w.Sequence = 0
}
// 刷新缓存
SequenceHistory[index] = w.Sequence
w.LastStamp = timeStamp
id := ((timeStamp - Twepech) << TimeShift) |
(w.DataCenter << DataCenterBitsShift) |
(w.WorkID << WorkIDBitsShift) |
w.Sequence
return int64(id)
}
var wgs sync.WaitGroup
func main() {
w := NewWorkGenerator(5, 5)
ch := make(chan int64, 10000)
defer close(ch)
count := 10000
wgs.Add(count)
for i := 0; i < count; i++ {
go func() {
defer wgs.Done()
id := w.generatorID()
ch <- id
}()
}
wgs.Wait()
m := make(map[int64]int)
for i := 0; i < count; i++ {
id := <-ch
_, ok := m[id]
if ok {
fmt.Printf("repeat id %d\n", id)
return
}
m[id] = i
}
fmt.Println("ALL", len(m), "snowflake ID Get successed!")
}