使用Scala简易生成雪花算法

最新推荐文章于 2024-05-15 03:57:03 发布

TZfool

最新推荐文章于 2024-05-15 03:57:03 发布

阅读量240

点赞数 1

分类专栏： scala 文章标签： scala 开发语言后端大数据

本文链接：https://blog.csdn.net/TZfool/article/details/130810523

版权

scala 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

首先scala的雪花算法本来就有Twitter的源码：
https://github.com/twitter-archive/snowflake/blob/snowflake-2010/src/main/scala/com/twitter/service/snowflake/IdWorker.scala

下面是简易的SnowflakeIdGenerator 对象代码：

object SnowflakeIdGenerator {
  private val twepoch = 1684479483972L // 起始时间戳，可以设置为项目开始运行时间的时间戳
  private val workerIdBits = 5L // 机器ID位数
  private val datacenterIdBits = 5L // 数据中心ID位数
  private val maxWorkerId = -1L ^ (-1L << workerIdBits) // 最大机器ID
  private val maxDatacenterId = -1L ^ (-1L << datacenterIdBits) // 最大数据中心ID
  private val sequenceBits = 12L // 序列号位数
  private val workerIdShift = sequenceBits // 机器ID左移位数
  private val datacenterIdShift = sequenceBits + workerIdBits // 数据中心ID左移位数
  private val timestampLeftShift = sequenceBits + workerIdBits + datacenterIdBits // 时间戳左移位数
  private val sequenceMask = -1L ^ (-1L << sequenceBits) // 序列号掩码

  private var lastTimestamp = -1L
  private var sequence = 0L
  private var workerId = 0L
  private var datacenterId = 0L

  def apply(workerId: Long, datacenterId: Long): SnowflakeIdGenerator.type = {
    require(workerId >= 0 && workerId <= maxWorkerId, s"worker Id can't be greater than $maxWorkerId or less than 0")
    require(datacenterId >= 0 && datacenterId <= maxDatacenterId, s"datacenter Id can't be greater than $maxDatacenterId or less than 0")
    this.workerId = workerId
    this.datacenterId = datacenterId
    this
  }

  def nextId(): Long = {
    var timestamp = System.currentTimeMillis()
    if (timestamp < lastTimestamp) {
      val errorMsg = s"Clock moved backwards. Refusing to generate id for ${lastTimestamp - timestamp} milliseconds"
      // 记录异常
      println(errorMsg)
      // 等待一段时间，让时钟追上上一次生成ID的时间戳
      Thread.sleep(lastTimestamp - timestamp)
      timestamp = System.currentTimeMillis()
    }

    if (lastTimestamp == timestamp) {
      sequence = (sequence + 1) & sequenceMask
      if (sequence == 0) {
        timestamp = tilNextMillis(lastTimestamp)
      }
    }else {
      sequence = 0L
    }

    lastTimestamp = timestamp

    ((timestamp - twepoch) << timestampLeftShift) |
      (datacenterId << datacenterIdShift) |
      (workerId << workerIdShift) |
      sequence
  }

  private def tilNextMillis(lastTimestamp: Long): Long = {
    var timestamp = System.currentTimeMillis()
    while (timestamp <= lastTimestamp) {
      timestamp = System.currentTimeMillis()
    }
    timestamp
  }
}

使用方式：

    def main(args: Array[String]): Unit = {
      val generator: SnowflakeIdGenerator.type = SnowflakeIdGenerator(0,0) //第一个参数是机器号(0-31)，第二个参数是数据中心(0-31)
      for(i<- 1 to 3000){
        val id = generator.nextId().toString.padTo(19, '0')
        println(id) //这里由于序列号都是0所以后三位全部补0
      }

运行结果：

1118022404145152000
1118022408339456000
1118022408339457000
1118022408339458000
1118022408339459000
1118022408339460000
1118022408339461000
1118022408339462000

注意这样生成的id如果是并发使用同一个时是会出现重复id的情况

例如连接spark时需要减少并行数：

  val spark: SparkSession = SparkSession.builder()
    .appName("Excel Data to Database")
//    .master("local[*]")  local[*]表示并行，并行数为本地CPU核数
    .master("local") //为防止雪花算法生成的id重复，不能开启并行
    .getOrCreate()