首先scala的雪花算法本来就有Twitter的源码:
https://github.com/twitter-archive/snowflake/blob/snowflake-2010/src/main/scala/com/twitter/service/snowflake/IdWorker.scala
下面是简易的SnowflakeIdGenerator 对象代码:
object SnowflakeIdGenerator {
private val twepoch = 1684479483972L // 起始时间戳,可以设置为项目开始运行时间的时间戳
private val workerIdBits = 5L // 机器ID位数
private val datacenterIdBits = 5L // 数据中心ID位数
private val maxWorkerId = -1L ^ (-1L << workerIdBits) // 最大机器ID
private val maxDatacenterId = -1L ^ (-1L << datacenterIdBits) // 最大数据中心ID
private val sequenceBits = 12L // 序列号位数
private val workerIdShift = sequenceBits // 机器ID左移位数
private val datacenterIdShift = sequenceBits + workerIdBits // 数据中心ID左移位数
private val timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits // 时间戳左移位数
private val sequenceMask = -1L ^ (-1L << sequenceBits) // 序列号掩码
private var lastTimestamp = -1L
private var sequence = 0L
private var workerId = 0L
private var datacenterId = 0L
def apply(workerId: Long, datacenterId: Long): SnowflakeIdGenerator.type = {
require(workerId >= 0 && workerId <= maxWorkerId, s"worker Id can't be greater than $maxWorkerId or less than 0")
require(datacenterId >= 0 && datacenterId <= maxDatacenterId, s"datacenter Id can't be greater than $maxDatacenterId or less than 0")
this.workerId = workerId
this.datacenterId = datacenterId
this
}
def nextId(): Long = {
var timestamp = System.currentTimeMillis()
if (timestamp < lastTimestamp) {
val errorMsg = s"Clock moved backwards. Refusing to generate id for ${lastTimestamp - timestamp} milliseconds"
// 记录异常
println(errorMsg)
// 等待一段时间,让时钟追上上一次生成ID的时间戳
Thread.sleep(lastTimestamp - timestamp)
timestamp = System.currentTimeMillis()
}
if (lastTimestamp == timestamp) {
sequence = (sequence + 1) & sequenceMask
if (sequence == 0) {
timestamp = tilNextMillis(lastTimestamp)
}
}else {
sequence = 0L
}
lastTimestamp = timestamp
((timestamp - twepoch) << timestampLeftShift) |
(datacenterId << datacenterIdShift) |
(workerId << workerIdShift) |
sequence
}
private def tilNextMillis(lastTimestamp: Long): Long = {
var timestamp = System.currentTimeMillis()
while (timestamp <= lastTimestamp) {
timestamp = System.currentTimeMillis()
}
timestamp
}
}
使用方式:
def main(args: Array[String]): Unit = {
val generator: SnowflakeIdGenerator.type = SnowflakeIdGenerator(0,0) //第一个参数是机器号(0-31),第二个参数是数据中心(0-31)
for(i<- 1 to 3000){
val id = generator.nextId().toString.padTo(19, '0')
println(id) //这里由于序列号都是0所以后三位全部补0
}
运行结果:
1118022404145152000
1118022408339456000
1118022408339457000
1118022408339458000
1118022408339459000
1118022408339460000
1118022408339461000
1118022408339462000
注意这样生成的id如果是并发使用同一个时是会出现重复id的情况
例如连接spark时需要减少并行数:
val spark: SparkSession = SparkSession.builder()
.appName("Excel Data to Database")
// .master("local[*]") local[*]表示并行,并行数为本地CPU核数
.master("local") //为防止雪花算法生成的id重复,不能开启并行
.getOrCreate()