雪花算法-用go实现-附带三种解决时钟回拨的方案以及代码-CSDN博客

本文链接：https://blog.csdn.net/qq_58339096/article/details/132483739

一.雪花算法的基本定义

1.雪花算法的原理就是生成一个64位的全局唯一id，由1bit符号位+41bit时间戳+10bit机器id+12bit序列号，第一位符号位不可调整，其他可以调整。

2. 41位的时间戳可以表示 (1 << 41) / (1000 * 60 * 60 * 24 * 365) = 69 年

3. 10bit可以表示 1 << 10 = 1024 台机器，一般分5bit给数据中心，5bit给机器id

4.12bit自增序列号可以表示 (1 << 12) 个ID。理论上snowflake方案的QPS约为409.6w/s.

5. 优点

(1) 毫秒数在高位，自增序列在低位，整个ID都是去是递增

(2)可以不依赖数据库个第三方系统,以服务的方式部署，稳定性高，生成ID的性能也非常高。

(3)可以根据自身业务特性分配bit位，灵活

6.缺点

非常依赖机器时间钟，如果机器时间钟回拨，会导致生成重复ID。

二.普通实现-go代码如下

package main

import (
	"errors"
	"fmt"
	"sync"
	"time"
)

const (
	workerIDBits     = uint64(5) // 10 bit工作机器ID中的5bit workerID
	dataCenterIDBits = uint64(5) // 10 bit工作机器ID中的5bit dataCenterID
	sequenceBits     = uint(12)  // 12bit的序列号

	maxWorkerID     = int64(-1) ^ (int64(-1) << workerIDBits)
	maxDataCenterID = int64(-1) ^ (int64(-1) << dataCenterIDBits)
	maxSequence     = int64(-1) ^ (int64(-1) << sequenceBits)

	timeLeft = uint8(22)
	dataLeft = uint8(17)
	workLeft = uint8(12)

	twepoch = int64(1692704479)
)

type Worker struct {
	mu           sync.Mutex
	LastStamp    int64 // 记录上一次ID的时间戳
	WorkerID     int64 // 该节点的ID
	DataCenterID int64 // 该节点的数据中心ID
	Sequence     int64 // 当前毫秒已经生成的ID序列号（从0开始累加）1毫秒内最多生成4096个ID
}

func NewWorker(workerId, dataCenterId int64) *Worker {
	if workerId < 0 || workerId > maxWorkerID {
		panic("机器id太大")
	}
	if dataCenterId < 0 || dataCenterId > maxDataCenterID {
		panic("数据中心id太大")
	}
	return &Worker{
		WorkerID:     workerId,
		DataCenterID: dataCenterId,
		LastStamp:    0,
		Sequence:     0,
	}
}

func (w *Worker) getMilliSeconds() int64 {
	return time.Now().UnixMilli()
}

func (w *Worker) NextID() (uint64, error) {
	w.mu.Lock()
	defer w.mu.Unlock()
	return w.nextID()
}

func (w *Worker) nextID() (uint64, error) {
	timeStamp := w.getMilliSeconds()
	if timeStamp < w.LastStamp {
		return 0, errors.New("time is moving backwards,waiting until")
	}

	if w.LastStamp == timeStamp {
		// 再用一毫秒内，递增sequence，并且比较是否大于最大的maxSequence
		w.Sequence = (w.Sequence + 1) & maxSequence

		// 生成的序列号个数超出 maxSequence，重新获取时间戳
		if w.Sequence == 0 {
			for timeStamp <= w.LastStamp {
				timeStamp = w.getMilliSeconds()
			}
		}

	} else {
		// 当前时间与上一次生成Id时间不一样，重置sequence的值
		w.Sequence = 0
	}

	w.LastStamp = timeStamp
	id := ((timeStamp - twepoch) << timeLeft) |
		(w.DataCenterID << dataLeft) |
		(w.WorkerID << workLeft) |
		w.Sequence

	return uint64(id), nil
}

var wg sync.WaitGroup

func main() {
	w := NewWorker(5, 5)
	ch := make(chan uint64, 10000)
	defer close(ch)
	count := 10000
	wg.Add(count)
	for i := 0; i < count; i++ {
		go func() {
			defer wg.Done()
			id, _ := w.NextID()
			ch <- id
		}()
	}

	wg.Wait()

	m := make(map[uint64]int)
	for i := 0; i < count; i++ {
		id := <-ch
		_, ok := m[id]
		if ok {
			fmt.Printf("repeat id %d\n", id)
			return
		}
		m[id] = i
	}

	fmt.Println("ALL", len(m), "snowflake ID Get successed!")
}

三.时钟回拨问题和解决方案

因为机器本地时钟可能会因为各种愿意发生不准的情况，网络中提供了NTP服务来做时间校准，做校准的时候就会发生时钟的跳跃或者回拨问题。

解决思路有以下：

1. 不依赖机器时钟驱动，自然就没有时钟回拨的问题。在初始时间戳自增，不跟随机器时钟增加，当序列号达到最大的时候，在时间戳上+1，这样不会浪费序列号，适合流量比较大的场景，如果流量比较小，可能会出现时间段层滞后。

package main

import (
	"fmt"
	"sync"
	"time"
)

const (
	TestWorkIdBits     = uint64(5)
	TestDataCenterBits = uint64(5)
	TestSequenceBits   = uint64(12)

	testWorkBitsMax       = -1 ^ (-1 << TestWorkIdBits)
	testDataCenterBitsMax = -1 ^ (-1 << TestDataCenterBits)
	testSequenceBitsMax   = -1 ^ (-1 << TestSequenceBits)

	Testtwepech = int64(1692704479) // 当前时间的时间戳 毫秒

	maxLength = 2000

	testTimeShift       = int64(22)
	testDataCenterShift = int(17)
	testWorkIdShift     = int(12)
)

var TestSequenceHistory = make([]int64, maxLength)
var startTimeStamp = int64(1692766479)

type TestWork struct {
	mu           sync.RWMutex
	WorkID       int64
	DataCenterID int64
	LastStamp    int64
	Sequence     int64
}

func NewTestWork(workId, dataCenterId int64) *TestWork {
	if workId < 0 || workId > maxWorkerID {
		panic("机器id太大")
	}
	if dataCenterId < 0 || dataCenterId > maxDataCenterID {
		panic("数据中心id太大")
	}
	return &TestWork{
		WorkID:       workId,
		DataCenterID: dataCenterId,
		LastStamp:    0,
		Sequence:     0,
	}
}

func (*TestWork) testGetMillisSecond() int64 {
	return time.Now().UnixMilli()
}

func (w *TestWork) TestNextID() (int64, error) {
	w.mu.Lock()
	defer w.mu.Unlock()
	return w.testNextID()
}

func (w *TestWork) testNextID() (int64, error) {

	w.Sequence = (w.Sequence + 1) & testSequenceBitsMax
	if w.Sequence == 0 {
		startTimeStamp++
	}

	id := ((startTimeStamp - Testtwepech) << testTimeShift) |
		(w.DataCenterID << testDataCenterShift) |
		(w.WorkID << testWorkIdShift) |
		w.Sequence
	return int64(id), nil
}

func main() {
	var wg sync.WaitGroup
	count := 10000
	w := NewTestWork(5, 5)
	ch := make(chan int64, count)
	wg.Add(count)

	for i := 0; i < count; i++ {
		go func() {
			defer wg.Done()
			id, _ := w.TestNextID()
			ch <- id
		}()
	}
	wg.Wait()

	maps := make(map[int64]int, count)
	for i := 0; i < count; i++ {
		id := <-ch
		_, ok := maps[id]
		if ok {
			fmt.Println("重复")
			return
		}
		maps[id] = i
	}
	fmt.Println("ALL", len(maps), "snowflake generator success")
}

2.依赖时钟机器，如果时钟机器回拨范围比较小，比如几十毫秒，可以等待一段时间让他恢复到正常。如果流量不大，前几百毫秒或者几秒内肯定有序列号没用完，可以将前面几百毫秒的时间的最大序列号缓存起来，如果发生时钟回拨，就从缓存中获取序列号自增。

2.1 等待时间

package main


import (
	"fmt"
	"sync"
	"time"
)

const (
	TestWorkIdBits     = uint64(5)
	TestDataCenterBits = uint64(5)
	TestSequenceBits   = uint64(12)

	testWorkBitsMax       = -1 ^ (-1 << TestWorkIdBits)
	testDataCenterBitsMax = -1 ^ (-1 << TestDataCenterBits)
	testSequenceBitsMax   = -1 ^ (-1 << TestSequenceBits)

	Testtwepech = int64(1692704479) // 当前时间的时间戳 毫秒

	maxLength = 2000

	testTimeShift       = int64(22)
	testDataCenterShift = int(17)
	testWorkIdShift     = int(12)
)

var TestSequenceHistory = make([]int64, maxLength)

type TestWork struct {
	mu           sync.RWMutex
	WorkID       int64
	DataCenterID int64
	LastStamp    int64
	Sequence     int64
}

func NewTestWork(workId, dataCenterId int64) *TestWork {
	if workId < 0 || workId > maxWorkerID {
		panic("机器id太大")
	}
	if dataCenterId < 0 || dataCenterId > maxDataCenterID {
		panic("数据中心id太大")
	}
	return &TestWork{
		WorkID:       workId,
		DataCenterID: dataCenterId,
		LastStamp:    0,
		Sequence:     0,
	}
}

func (*TestWork) testGetMillisSecond() int64 {
	return time.Now().UnixMilli()
}

func (w *TestWork) TestNextID() (int64, error) {
	w.mu.Lock()
	defer w.mu.Unlock()
	return w.testNextID()
}

func (w *TestWork) testNextID() (int64, error) {
	timeStamp := w.testGetMillisSecond()

	if timeStamp < w.LastStamp {
		sleepCnt := 0
		sleepMaxCnt := 2
		for {
			if sleepCnt < sleepMaxCnt && w.LastStamp-timeStamp <= 500 {
				// 休眠 500ms后接着重试
				time.Sleep(500 * time.Millisecond)
				timeStamp = w.testGetMillisSecond()
				sleepCnt++
			} else {
				panic("生成id超时")
			}
			if timeStamp >= w.LastStamp {
				break
			}
		}
	}

	if timeStamp == w.LastStamp {
		w.Sequence = (w.Sequence + 1) & testSequenceBitsMax
		if w.Sequence == 0 {
			for timeStamp <= w.LastStamp {
				timeStamp = w.testGetMillisSecond()
			}
		}
	} else {
		w.Sequence = 0
	}

	w.LastStamp = timeStamp

	id := ((timeStamp - Testtwepech) << testTimeShift) |
		(w.DataCenterID << testDataCenterShift) |
		(w.WorkID << testWorkIdShift) |
		w.Sequence
	return int64(id), nil
}

func main() {
	var wg sync.WaitGroup
	count := 10000
	w := NewTestWork(5, 5)
	ch := make(chan int64, count)
	wg.Add(count)

	for i := 0; i < count; i++ {
		go func() {
			defer wg.Done()
			id, _ := w.TestNextID()
			ch <- id
		}()
	}
	wg.Wait()

	maps := make(map[int64]int, count)
	for i := 0; i < count; i++ {
		id := <-ch
		_, ok := maps[id]
		if ok {
			fmt.Println("重复")
			return
		}
		maps[id] = i
	}
	fmt.Println("ALL", len(maps), "snowflake generator success")
}

2.2缓存

package main

import (
	"fmt"
	"sync"
	"time"
)

const (
	WorkIDBits     = uint64(5)  // 工作机器id 5bit
	DataCenterBits = uint64(5)  // 工作机器数据中心 5bit
	SequenceBits   = uint64(12) // 序列号 12bit

	WorkIDBitsMax     = -1 ^ (-1 << WorkIDBits)     // 工作机器ID最大值
	DataCenterBitsMax = -1 ^ (-1 << DataCenterBits) // 工作机器数据中心最大值
	SequenceBitsMax   = -1 ^ (-1 << SequenceBits)   // 序列号最大值

	TimeShift           = int64(22) // 时间戳偏移量
	DataCenterBitsShift = int(17)   // 数据中心偏移量
	WorkIDBitsShift     = int(12)   // 工作机器id偏移量

	Twepech   = int64(1692704479) // 当前时间的时间戳 毫秒
	MaxLength = 2000              // 最大允许的时间回拨的毫秒数
)

var SequenceHistory = make([]int64, MaxLength) // 缓存近2s内的每毫秒最大序列号

type Work struct {
	mu         sync.RWMutex
	LastStamp  int64 // 上次生成id的时间
	Sequence   int64 // 序列号
	WorkID     int64 // 工作机器id
	DataCenter int64 // 数据中心id
}

func NewWorkGenerator(workID, dataCenter int64) *Work {
	if workID < 0 || workID > maxWorkerID {
		panic("机器id太大")
	}
	if dataCenter < 0 || dataCenter > maxDataCenterID {
		panic("数据中心id太大")
	}
	return &Work{
		LastStamp:  0,
		Sequence:   0,
		WorkID:     workID,
		DataCenter: dataCenter,
	}
}

// getMillisSecond 获取当前毫秒数
func (w *Work) getMillisSecond() int64 {
	return time.Now().UnixMilli()
}

// generatorID 生成ID
func (w *Work) generatorID() int64 {
	w.mu.Lock()
	defer w.mu.Unlock()
	// 1.获取当前时间
	timeStamp := w.getMillisSecond()
	// 索引记录当前毫秒数下最大序列号所在数组的位置
	index := int(timeStamp % MaxLength)

	// 2.出现时间回拨
	if timeStamp < w.LastStamp {
		// 判断是否在缓存时间范围内
		if w.LastStamp-timeStamp > MaxLength {
			panic("时间会退范围太大，超过2000ms缓存")
		}

		w.Sequence = 0
		// 在缓存范围内
		for {
			// 拿到之前毫秒数生成的最大序列号
			preSequence := SequenceHistory[index]
			// 判断之前的序列号是否已经到达最大值
			w.Sequence = SequenceBitsMax & (preSequence + 1)
			// 如果到达最大值，重新获取毫秒数
			if w.Sequence == 0 {
				timeStamp = w.getMillisSecond()
				index = int(timeStamp % MaxLength)
			} else {
				// 没有到达最大值，更新缓存，生成id
				SequenceHistory[index] = w.Sequence
				id := ((timeStamp - Twepech) << TimeShift) |
					(w.DataCenter << DataCenterBitsShift) |
					(w.WorkID << WorkIDBitsShift) |
					w.Sequence
				return int64(id)
			}
			// 结束循环条件
			if timeStamp >= w.LastStamp {
				break
			}
		}
	}

	// 3.没有出现时间回拨
	// 如果时间相等
	if timeStamp == w.LastStamp {
		// 判断生成的序列号是否超出范围
		w.Sequence = (w.Sequence + 1) & SequenceBitsMax
		// 如果超出范围
		if w.Sequence == 0 {
			// 当前毫秒生成序列号的个数已满，重新获取下一毫秒
			for timeStamp <= w.LastStamp {
				timeStamp = w.getMillisSecond()
				index = int(timeStamp % MaxLength)
			}
		}
	} else {
		// 时间大于上次生成id的时间，重置序列号sequence
		w.Sequence = 0
	}

	// 刷新缓存
	SequenceHistory[index] = w.Sequence
	w.LastStamp = timeStamp

	id := ((timeStamp - Twepech) << TimeShift) |
		(w.DataCenter << DataCenterBitsShift) |
		(w.WorkID << WorkIDBitsShift) |
		w.Sequence
	return int64(id)
}

var wgs sync.WaitGroup

func main() {
	w := NewWorkGenerator(5, 5)
	ch := make(chan int64, 10000)
	defer close(ch)
	count := 10000
	wgs.Add(count)
	for i := 0; i < count; i++ {
		go func() {
			defer wgs.Done()
			id := w.generatorID()
			ch <- id
		}()
	}

	wgs.Wait()

	m := make(map[int64]int)
	for i := 0; i < count; i++ {
		id := <-ch
		_, ok := m[id]
		if ok {
			fmt.Printf("repeat id %d\n", id)
			return
		}
		m[id] = i
	}

	fmt.Println("ALL", len(m), "snowflake ID Get successed!")
}