call stack
In BlockManager.getDiskWriter method, create a DiskBlockObjectWriter object.
def getDiskWriter(
blockId: BlockId,
file: File,
serializerInstance: SerializerInstance,
bufferSize: Int,
writeMetrics: ShuffleWriteMetrics): DiskBlockObjectWriter = {
val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize,
syncWrites, writeMetrics, blockId)
}
trait ManualCloseOutputStream
/**
* Guards against close calls, e.g. from a wrapping stream.
* Call manualClose to close the stream that was extended by this trait.
* Commit uses this trait to close object streams without paying the
* cost of closing and opening the underlying file.
*/
private trait ManualCloseOutputStream extends OutputStream {
abstract override def close(): Unit = {
flush()
}
def manualClose(): Unit = {
super.close()
}
}
fields
/** The file channel, used for repositioning / truncating the file. */
private var channel: FileChannel = null
private var mcs: ManualCloseOutputStream = null
private var bs: OutputStream = null
private var fos: FileOutputStream = null
private var ts: TimeTrackingOutputStream = null
private var objOut: SerializationStream = null
private var initialized = false
private var streamOpen = false
private var hasBeenClosed = false
/**
* Cursors used to represent positions in the file.
*
* xxxxxxxxxx|----------|-----|
* ^ ^ ^
* | | channel.position()
* | reportedPosition
* committedPosition
*
* reportedPosition: Position at the time of the last update to the write metrics.
* committedPosition: Offset after last committed write.
* -----: Current writes to the underlying file.
* xxxxx: Committed contents of the file.
*/
private var committedPosition = file.length()
private var reportedPosition = committedPosition
/**
* Keep track of number of records written and also use this to periodically
* output bytes written since the latter is expensive to do for each record.
*/
private var numRecordsWritten = 0
initialize
private def initialize(): Unit = {
fos = new FileOutputStream(file, true)
channel = fos.getChannel()
ts = new TimeTrackingOutputStream(writeMetrics, fos)
class ManualCloseBufferedOutputStream
extends BufferedOutputStream(ts, bufferSize) with ManualCloseOutputStream
mcs = new ManualCloseBufferedOutputStream
}
def open(): DiskBlockObjectWriter = {
if (hasBeenClosed) {
throw new IllegalStateException("Writer already closed. Cannot be reopened.")
}
if (!initialized) {
initialize()
initialized = true
}
bs = serializerManager.wrapStream(blockId, mcs)
objOut = serializerInstance.serializeStream(bs)
streamOpen = true
this
}
closeResources
/**
* Close and cleanup all resources.
* Should call after committing or reverting partial writes.
*/
private def closeResources(): Unit = {
if (initialized) {
mcs.manualClose()
channel = null
mcs = null
bs = null
fos = null
ts = null
objOut = null
initialized = false
streamOpen = false
hasBeenClosed = true
}
}
close
/**
* Commits any remaining partial writes and closes resources.
*/
override def close() {
if (initialized) {
Utils.tryWithSafeFinally {
commitAndGet()
} {
closeResources()
}
}
}
commitAndGet
/**
* Flush the partial writes and commit them as a single atomic block.
* A commit may write additional bytes to frame the atomic block.
*
* @return file segment with previous offset and length committed on this call.
*/
def commitAndGet(): FileSegment = {
if (streamOpen) {
// NOTE: Because Kryo doesn't flush the underlying stream we explicitly flush both the
// serializer stream and the lower level stream.
objOut.flush()
bs.flush()
objOut.close()
streamOpen = false
if (syncWrites) {
// Force outstanding writes to disk and track how long it takes
val start = System.nanoTime()
fos.getFD.sync()
writeMetrics.incWriteTime(System.nanoTime() - start)
}
val pos = channel.position()
val fileSegment = new FileSegment(file, committedPosition, pos - committedPosition)
committedPosition = pos
// In certain compression codecs, more bytes are written after streams are closed
writeMetrics.incBytesWritten(committedPosition - reportedPosition)
reportedPosition = committedPosition
fileSegment
} else {
new FileSegment(file, committedPosition, 0)
}
}
revertPartialWritesAndClose
/**
* Reverts writes that haven't been committed yet. Callers should invoke this function
* when there are runtime exceptions. This method will not throw, though it may be
* unsuccessful in truncating written data.
*
* @return the file that this DiskBlockObjectWriter wrote to.
*/
def revertPartialWritesAndClose(): File = {
// Discard current writes. We do this by flushing the outstanding writes and then
// truncating the file to its initial position.
try {
if (initialized) {
writeMetrics.decBytesWritten(reportedPosition - committedPosition)
writeMetrics.decRecordsWritten(numRecordsWritten)
streamOpen = false
closeResources()
}
val truncateStream = new FileOutputStream(file, true)
try {
truncateStream.getChannel.truncate(committedPosition)
file
} finally {
truncateStream.close()
}
} catch {
case e: Exception =>
logError("Uncaught exception while reverting partial writes to file " + file, e)
file
}
}
write
/**
* Writes a key-value pair.
*/
def write(key: Any, value: Any) {
if (!streamOpen) {
open()
}
objOut.writeKey(key)
objOut.writeValue(value)
recordWritten()
}
override def write(kvBytes: Array[Byte], offs: Int, len: Int): Unit = {
if (!streamOpen) {
open()
}
bs.write(kvBytes, offs, len)
}
/**
* Notify the writer that a record worth of bytes has been written with OutputStream#write.
*/
def recordWritten(): Unit = {
numRecordsWritten += 1
writeMetrics.incRecordsWritten(1)
if (numRecordsWritten % 16384 == 0) {
updateBytesWritten()
}
}
/**
* Report the number of bytes written in this writer's shuffle write metrics.
* Note that this is only valid before the underlying streams are closed.
*/
private def updateBytesWritten() {
val pos = channel.position()
writeMetrics.incBytesWritten(pos - reportedPosition)
reportedPosition = pos
}
// For testing
private[spark] override def flush() {
objOut.flush()
bs.flush()
}
}