Spark-SerializerManager&JavaSerializer&KryoSerializer 源码解析

abstract class Serializer

这个类是 spark 序列化的抽象类,规定了通用的方法。

//保存 类加载器
@volatile protected var defaultClassLoader: Option[ClassLoader] = None
//设置当前的 类加载器
def setDefaultClassLoader(classLoader: ClassLoader): Serializer = {
    defaultClassLoader = Some(classLoader)
    this
  }
//返回 序列化Instance 对象
def newInstance(): SerializerInstance
//是否支持 重定位序列化对象
private[spark] def supportsRelocationOfSerializedObjects: Boolean = false

JavaSerializer

private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100)
  private var extraDebugInfo = conf.getBoolean("spark.serializer.extraDebugInfo", true)

  protected def this() = this(new SparkConf())  // For deserialization only

  override def newInstance(): SerializerInstance = {
    val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
    new JavaSerializerInstance(counterReset, extraDebugInfo, classLoader)
  }

  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
    out.writeInt(counterReset)
    out.writeBoolean(extraDebugInfo)
  }

  override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
    counterReset = in.readInt()
    extraDebugInfo = in.readBoolean()
  }

KryoSerializer

abstract class SerializerInstance

序列化Instance 对象 的 抽象类。

//序列化对象
def serialize[T: ClassTag](t: T): ByteBuffer
//反序列化 
def deserialize[T: ClassTag](bytes: ByteBuffer): T
//反序列化 使用特定的 类加载器
def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T
//序列化流
def serializeStream(s: OutputStream): SerializationStream
//反序列化流
def deserializeStream(s: InputStream): DeserializationStream

JavaSerializerInstance

KryoSerializerInstance

abstract class SerializationStream

def writeObject[T: ClassTag](t: T): SerializationStream

def writeKey[T: ClassTag](key: T): SerializationStream = writeObject(key)

def writeValue[T: ClassTag](value: T): SerializationStream = writeObject(value)
  def flush(): Unit
  override def close(): Unit

def writeAll[T: ClassTag](iter: Iterator[T]): SerializationStream = {
    while (iter.hasNext) {
      writeObject(iter.next())
    }
    this
  }

JavaSerializationStream

JavaSerializationStream继承了SerializationStream,其内部有2个属性:

private val objOut = new ObjectOutputStream(out) //使用默认的 java 序列化 机制
private var counter = 0 //一个计数器

方法:

//对象序列化的方法
def writeObject[T: ClassTag](t: T): SerializationStream = {
    try {
      objOut.writeObject(t)
    } catch {
      case e: NotSerializableException if extraDebugInfo =>
        throw SerializationDebugger.improveException(t, e)
    }
    counter += 1
    if (counterReset > 0 && counter >= counterReset) {
      objOut.reset()
      counter = 0
    }
    this
  }

  def flush() { objOut.flush() }
  def close() { objOut.close() }

KryoSerializationStream

KryoSerializationStream 继承自SerializationStream
方法:

private[this] var output: KryoOutput =
    if (useUnsafe) new KryoUnsafeOutput(outStream) else new KryoOutput(outStream)

  private[this] var kryo: Kryo = serInstance.borrowKryo()

  override def writeObject[T: ClassTag](t: T): SerializationStream = {
    kryo.writeClassAndObject(output, t)
    this
  }

  override def flush() {
    if (output == null) {
      throw new IOException("Stream is closed")
    }
    output.flush()
  }

  override def close() {
    if (output != null) {
      try {
        output.close()
      } finally {
        serInstance.releaseKryo(kryo)
        kryo = null
        output = null
      }
    }
  }

abstract class DeserializationStream

def readObject[T: ClassTag](): T

def readKey[T: ClassTag](): T = readObject[T]()

def readValue[T: ClassTag](): T = readObject[T]()
  override def close(): Unit
  
def asIterator: Iterator[Any] = new NextIterator[Any] {
    override protected def getNext() = {
      try {
        readObject[Any]()
      } catch {
        case eof: EOFException =>
          finished = true
          null
      }
    }

    override protected def close() {
      DeserializationStream.this.close()
    }
  }

def asKeyValueIterator: Iterator[(Any, Any)] = new NextIterator[(Any, Any)] {
    override protected def getNext() = {
      try {
        (readKey[Any](), readValue[Any]())
      } catch {
        case eof: EOFException =>
          finished = true
          null
      }
    }

    override protected def close() {
      DeserializationStream.this.close()
    }
  }

JavaDeserializationStream

JavaDeserializationStream 继承自DeserializationStream。其内部有1个属性:

//使用java 的反序列化机制
private val objIn = new ObjectInputStream(in) {
    override def resolveClass(desc: ObjectStreamClass): Class[_] =
      try {
        // scalastyle:off classforname
        Class.forName(desc.getName, false, loader)
        // scalastyle:on classforname
      } catch {
        case e: ClassNotFoundException =>
          JavaDeserializationStream.primitiveMappings.getOrElse(desc.getName, throw e)
      }
  }

  def readObject[T: ClassTag](): T = objIn.readObject().asInstanceOf[T]
  def close() { objIn.close() }

KryoDeserializationStream

KryoDeserializationStream 继承自 DeserializationStream。


  private[this] var input: KryoInput =
    if (useUnsafe) new KryoUnsafeInput(inStream) else new KryoInput(inStream)

  private[this] var kryo: Kryo = serInstance.borrowKryo()

  override def readObject[T: ClassTag](): T = {
    try {
      kryo.readClassAndObject(input).asInstanceOf[T]
    } catch {
      // DeserializationStream uses the EOF exception to indicate stopping condition.
      case e: KryoException
        if e.getMessage.toLowerCase(Locale.ROOT).contains("buffer underflow") =>
        throw new EOFException
    }
  }

  override def close() {
    if (input != null) {
      try {
        // Kryo's Input automatically closes the input stream it is using.
        input.close()
      } finally {
        serInstance.releaseKryo(kryo)
        kryo = null
        input = null
      }
    }
  }

SerializerManager

通过这个类 来管理各个 序列化和反序列化对象的 实现方法。
用来判断某个字段或者类型使用使用使用哪一种序列化实例来序列化
这个版本的spark已经开始支持kryo的序列化了,所以其内部属性存在一个kryoSerializer。
属性:

private[this] val kryoSerializer = new KryoSerializer(conf) //kyro序列化器实例
private[this] val stringClassTag: ClassTag[String] = implicitly[ClassTag[String]] //string的classTag
private[this] val primitiveAndPrimitiveArrayClassTags: Set[ClassTag[_]] //java基本类型的和对应的 array类型的 classTag
//compressBroadcast 广播压缩 选项
private[this] val compressBroadcast = conf.getBoolean("spark.broadcast.compress", true)
  // compressShuffle shuffle压缩 选项
  private[this] val compressShuffle = conf.getBoolean("spark.shuffle.compress", true)
  // compressRdds rdd压缩 选项
  private[this] val compressRdds = conf.getBoolean("spark.rdd.compress", false)
  // compressShuffleSpill shuffle spill 选项
  private[this] val compressShuffleSpill = conf.getBoolean("spark.shuffle.spill.compress", true)
//shuffle 溢出文件输出 压缩器
  private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec(conf)

方法:

//设置 kryo 实例 的 加载器
def setDefaultClassLoader(classLoader: ClassLoader): Unit = {
    kryoSerializer.setDefaultClassLoader(classLoader)
  }
//是否支持加密
def encryptionEnabled: Boolean = encryptionKey.isDefined
//判断是否可以使用kyro 序列化,是java 基本类型,或java基本类型的array类型
def canUseKryo(ct: ClassTag[_]): Boolean = {
    primitiveAndPrimitiveArrayClassTags.contains(ct) || ct == stringClassTag
  }
//根据 autoPick 和数据的类型 优先选用kyro 序列化器
def getSerializer(ct: ClassTag[_], autoPick: Boolean): Serializer = {
    if (autoPick && canUseKryo(ct)) {
      kryoSerializer
    } else {
      defaultSerializer
    }
  }
//根据 key-value的类型优先选择 kryo 序列化器
def getSerializer(keyClassTag: ClassTag[_], valueClassTag: ClassTag[_]): Serializer = {
    if (canUseKryo(keyClassTag) && canUseKryo(valueClassTag)) {
      kryoSerializer
    } else {
      defaultSerializer
    }
  }
//根据block类型 判断是否需要 压缩
private def shouldCompress(blockId: BlockId): Boolean = {
    blockId match {
      case _: ShuffleBlockId => compressShuffle
      case _: BroadcastBlockId => compressBroadcast
      case _: RDDBlockId => compressRdds
      case _: TempLocalBlockId => compressShuffleSpill
      case _: TempShuffleBlockId => compressShuffle
      case _ => false
    }
  }
//加密 和压缩 一个输入流
def wrapStream(blockId: BlockId, s: InputStream): InputStream = {
    wrapForCompression(blockId, wrapForEncryption(s))
  }
//加密 和压缩 一个输出流
def wrapStream(blockId: BlockId, s: OutputStream): OutputStream = {
    wrapForCompression(blockId, wrapForEncryption(s))
  }
//压缩
def wrapForCompression(blockId: BlockId, s: OutputStream): OutputStream = {
    if (shouldCompress(blockId)) compressionCodec.compressedOutputStream(s) else s
  }
//压缩
def wrapForCompression(blockId: BlockId, s: InputStream): InputStream = {
    if (shouldCompress(blockId)) compressionCodec.compressedInputStream(s) else s
  }

//序列化数据
def dataSerializeStream[T: ClassTag](
      blockId: BlockId,
      outputStream: OutputStream,
      values: Iterator[T]): Unit = {
    val byteStream = new BufferedOutputStream(outputStream)
    val autoPick = !blockId.isInstanceOf[StreamBlockId] //kyro不支持 spark stream的序列化 
    val ser = getSerializer(implicitly[ClassTag[T]], autoPick).newInstance()//获取到 合适的 序列化器 
    ser.serializeStream(wrapForCompression(blockId, byteStream)).writeAll(values).close() //序列化数据
  }
//返回ChunkedByteBuffer 序列化数据
def dataSerialize[T: ClassTag](
      blockId: BlockId,
      values: Iterator[T]): ChunkedByteBuffer = {
    dataSerializeWithExplicitClassTag(blockId, values, implicitly[ClassTag[T]])
  }
//ChunkedByteBuffer 序列化数据
def dataSerializeWithExplicitClassTag(
      blockId: BlockId,
      values: Iterator[_],
      classTag: ClassTag[_]): ChunkedByteBuffer = {
    val bbos = new ChunkedByteBufferOutputStream(1024 * 1024 * 4, ByteBuffer.allocate)//使用了 ChunkedByteBufferOutputStream 这种流
    val byteStream = new BufferedOutputStream(bbos)
    val autoPick = !blockId.isInstanceOf[StreamBlockId]
    val ser = getSerializer(classTag, autoPick).newInstance()
    ser.serializeStream(wrapForCompression(blockId, byteStream)).writeAll(values).close()
    bbos.toChunkedByteBuffer
  }
//反序列化 数据
def dataDeserializeStream[T](
      blockId: BlockId,
      inputStream: InputStream)
      (classTag: ClassTag[T]): Iterator[T] = {
    val stream = new BufferedInputStream(inputStream)
    val autoPick = !blockId.isInstanceOf[StreamBlockId]
    getSerializer(classTag, autoPick)
      .newInstance()
      .deserializeStream(wrapForCompression(blockId, stream))
      .asIterator.asInstanceOf[Iterator[T]]
  }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值