/**
* Copyright (C) 2009-2014 Typesafe Inc. <http://www.typesafe.com>
*/
package akka.cluster
import akka.AkkaException
import System.{ currentTimeMillis ⇒ newTimestamp }
import java.security.MessageDigest
import java.util.concurrent.atomic.AtomicLong
import scala.collection.immutable.TreeMap
import scala.annotation.tailrec
/**
* VectorClock module with helper classes and methods.
*
* Based on code from the 'vlock' VectorClock library by Coda Hale.
*/
private[cluster] object VectorClock {
/**
* Hash representation of a versioned node name.
*/
type Node = String
object Node {
def apply(name: String): Node = hash(name)
def fromHash(hash: String): Node = hash
private def hash(name: String): String = {
val digester = MessageDigest.getInstance("MD5")
digester update name.getBytes("UTF-8")
digester.digest.map { h ⇒ "%02x".format(0xFF & h) }.mkString
}
}
object Timestamp {
final val Zero = 0L
final val EndMarker = Long.MinValue
}
// sealed表示只可在同一文件中继承
sealed trait Ordering
// 实例类,可以理解为Enum;在下面的操作中既作为参数也作为返回值
case object After extends Ordering
case object Before extends Ordering
case object Same extends Ordering
case object Concurrent extends Ordering
/**
* Marker to ensure that we do a full order comparison instead of bailing out early.
*/
private case object FullOrder extends Ordering
/**
* Marker to signal that we have reached the end of a vector clock.
*/
private val cmpEndMarker = (VectorClock.Node("endmarker"), Timestamp.EndMarker)
}
/**
* Representation of a Vector-based clock (counting clock), inspired by Lamport logical clocks.
* {{{
* Reference:
* 1) Leslie Lamport (1978). "Time, clocks, and the ordering of events in a distributed system". Communications of the ACM 21 (7): 558-565.
* 2) Friedemann Mattern (1988). "Virtual Time and Global States of Distributed Systems". Workshop on Parallel and Distributed Algorithms: pp. 215-226
* }}}
*
* Based on code from the 'vlock' VectorClock library by Coda Hale.
*/
@SerialVersionUID(1L)
final case class VectorClock(
versions: TreeMap[VectorClock.Node, Long] = TreeMap.empty[VectorClock.Node, Long]) {
import VectorClock._
/**
* Increment the version for the node passed as argument. Returns a new VectorClock.
*/
def :+(node: Node): VectorClock = {
val currentTimestamp = versions.getOrElse(node, Timestamp.Zero)
copy(versions = versions.updated(node, currentTimestamp + 1))
}
// 重载操作符用于比较两个VectorClock
/**
* Returns true if <code>this</code> and <code>that</code> are concurrent else false.
*/
def <>(that: VectorClock): Boolean = compareOnlyTo(that, Concurrent) eq Concurrent
/**
* Returns true if <code>this</code> is before <code>that</code> else false.
*/
def <(that: VectorClock): Boolean = compareOnlyTo(that, Before) eq Before
/**
* Returns true if <code>this</code> is after <code>that</code> else false.
*/
def >(that: VectorClock): Boolean = compareOnlyTo(that, After) eq After
/**
* Returns true if this VectorClock has the same history as the 'that' VectorClock else false.
*/
def ==(that: VectorClock): Boolean = compareOnlyTo(that, Same) eq Same
/**
* Vector clock comparison according to the semantics described by compareTo, with the ability to bail
* out early if the we can't reach the Ordering that we are looking for.
*
* The ordering always starts with Same and can then go to Same, Before or After
* If we're on After we can only go to After or Concurrent
* If we're on Before we can only go to Before or Concurrent
* If we go to Concurrent we exit the loop immediately
*
* If you send in the ordering FullOrder, you will get a full comparison.
*/
private final def compareOnlyTo(that: VectorClock, order: Ordering): Ordering = {
// 定义内部函数,用于从迭代器中取出下一个值,如果没有值默认取出default;
// 在下面的应用中默认取出cmpEndMarker,用于标记迭代器到头。
def nextOrElse[T](iter: Iterator[T], default: T): T = if (iter.hasNext) iter.next() else default
// 定义内部函数,从两个迭代器中取值进行比较
def compare(i1: Iterator[(Node, Long)], i2: Iterator[(Node, Long)], requestedOrder: Ordering): Ordering = {
// 尾递归标识,当内部函数不符合尾递归规则的时候会出现编译错误
@tailrec
def compareNext(nt1: (Node, Long), nt2: (Node, Long), currentOrder: Ordering): Ordering =
if ((requestedOrder ne FullOrder) && (currentOrder ne Same) && (currentOrder ne requestedOrder)) currentOrder
else if ((nt1 eq cmpEndMarker) && (nt2 eq cmpEndMarker)) currentOrder
// i1 is empty but i2 is not, so i1 can only be Before
else if (nt1 eq cmpEndMarker) { if (currentOrder eq After) Concurrent else Before }
// i2 is empty but i1 is not, so i1 can only be After
else if (nt2 eq cmpEndMarker) { if (currentOrder eq Before) Concurrent else After }
else {
// compare the nodes
val nc = nt1._1 compareTo nt2._1
// 当该节点在i1和i2中都存在,比较该节点的值在i1和i2中的值
if (nc == 0) {
// both nodes exist compare the timestamps
// same timestamp so just continue with the next nodes
// 值相等,则递归判断下一个节点
if (nt1._2 == nt2._2) compareNext(nextOrElse(i1, cmpEndMarker), nextOrElse(i2, cmpEndMarker), currentOrder)
else if (nt1._2 < nt2._2) {
// t1 is less than t2, so i1 can only be Before
// 当该节点在i1中的值"小于"在i2中的值,如果之前是After,则返回Concurrent,因为无法判断两个vector的先后顺序
// 如果之前不是After,则设置currentOrder为Before,则递归判断下一个节点
if (currentOrder eq After) Concurrent
else compareNext(nextOrElse(i1, cmpEndMarker), nextOrElse(i2, cmpEndMarker), Before)
} else {
// t2 is less than t1, so i1 can only be After
// 当该节点在i1中的值"大于"在i2中的值,如果之前是Before,则返回Concurrent,因为无法判断两个vector的先后顺序
// 如果之前不是Before,则设置currentOrder为After,则递归判断下一个节点
if (currentOrder eq Before) Concurrent
else compareNext(nextOrElse(i1, cmpEndMarker), nextOrElse(i2, cmpEndMarker), After)
}
} else if (nc < 0) {
// this node only exists in i1 so i1 can only be After
// 该节点只在i1中存在,不在i2中存在,则i1 After i2,而之前是i1 Before i2,则返回Concurrent
if (currentOrder eq Before) Concurrent
// 之前是i1 After i2,则继续拿i1的下一个节点和i2的当前节点进行比较,并且设置为After
else compareNext(nextOrElse(i1, cmpEndMarker), nt2, After)
} else {
// this node only exists in i2 so i1 can only be Before
// 情况与nc < 0相反
if (currentOrder eq After) Concurrent
else compareNext(nt1, nextOrElse(i2, cmpEndMarker), Before)
}
}
compareNext(nextOrElse(i1, cmpEndMarker), nextOrElse(i2, cmpEndMarker), Same)
}
if ((this eq that) || (this.versions eq that.versions)) Same
else compare(this.versions.iterator, that.versions.iterator, if (order eq Concurrent) FullOrder else order)
}
/**
* Compare two vector clocks. The outcome will be one of the following:
* <p/>
* {{{
* 1. Clock 1 is SAME (==) as Clock 2 iff for all i c1(i) == c2(i)
* 2. Clock 1 is BEFORE (<) Clock 2 iff for all i c1(i) <= c2(i) and there exist a j such that c1(j) < c2(j)
* 3. Clock 1 is AFTER (>) Clock 2 iff for all i c1(i) >= c2(i) and there exist a j such that c1(j) > c2(j).
* 4. Clock 1 is CONCURRENT (<>) to Clock 2 otherwise.
* }}}
*/
def compareTo(that: VectorClock): Ordering = {
compareOnlyTo(that, FullOrder)
}
/**
* Merges this VectorClock with another VectorClock. E.g. merges its versioned history.
*/
def merge(that: VectorClock): VectorClock = {
var mergedVersions = that.versions
for ((node, time) ← versions) {
val mergedVersionsCurrentTime = mergedVersions.getOrElse(node, Timestamp.Zero)
if (time > mergedVersionsCurrentTime)
mergedVersions = mergedVersions.updated(node, time)
}
VectorClock(mergedVersions)
}
override def toString = versions.map { case ((n, t)) ⇒ n + " -> " + t }.mkString("VectorClock(", ", ", ")")
}
在上面的代码中定义了如下操作:
1)添加节点到vector clock中
2)两个vector clock的关系是否是Concurrent (<>)
3)两个vector clock的关系是否是:前一个Before后一个 (<)
4)两个vector clock的关系是否是:前一个After后一个 (>)
5)两个vector clock是否一致。 (==)
6)合并两个vector clock
7)比较两个vector clock,返回Concurrent,Before或者After,Same
通过上面的操作完成了vector clock的比较和更新。
在每一个工作节点上,vector clock都是动态变化的(越来越大)。只有当工作节点B发送时间戳到了工作节点A上(或者其他节点C发送的时间戳带有B),节点A才会将B添加到vector clock上。这样做的好处是节省空间,不用在初始的时候就为各个公共节点预留空间,同时可以做到动态扩展,方便其他工作节点动态的加入到集群中。
具体规则见注释。
为什么使用TreeMap存放节点?
为了保证在比较时节点都是按照某种顺序排列的。