-
WAL日志
-
数据副本
-
接收receiver的数据流回放
/** Store block and report it to driver */
def
pushAndReportBlock(
receivedBlock
:
ReceivedBlock,
metadataOption
:
Option[Any],
blockIdOption
:
Option[StreamBlockId]
) {
val
blockId
=
blockIdOption.getOrElse(nextBlockId)
val
time
=
System.currentTimeMillis
val
blockStoreResult
=
receivedBlockHandler.storeBlock(blockId, receivedBlock)
logDebug(s
"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms"
)
val
numRecords
=
blockStoreResult.numRecords
val
blockInfo
=
ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult)
trackerEndpoint.askWithRetry[Boolean](AddBlock(blockInfo))
logDebug(s
"Reported block $blockId"
)
}
数据的存储,是借助receiverBlockHandler,它的实现有两种方式:
private
val
receivedBlockHandler
:
ReceivedBlockHandler
=
{
if
(WriteAheadLogUtils.enableReceiverLog(env.conf)) {
if
(checkpointDirOption.isEmpty) {
throw
new
SparkException(
"Cannot enable receiver write-ahead log without checkpoint directory set. "
+
"Please use streamingContext.checkpoint() to set the checkpoint directory. "
+
"See documentation for more details."
)
}
new
WriteAheadLogBasedBlockHandler(env.blockManager, receiver.streamId,
receiver.storageLevel, env.conf, hadoopConf, checkpointDirOption.get)
}
else
{
new
BlockManagerBasedBlockHandler(env.blockManager, receiver.storageLevel)
}
}
private
val
effectiveStorageLevel
=
{
if
(storageLevel.deserialized) {
logWarning(s
"Storage level serialization ${storageLevel.deserialized} is not supported when"
+
s
" write ahead log is enabled, change to serialization false"
)
}
if
(storageLevel.replication >
1
) {
logWarning(s
"Storage level replication ${storageLevel.replication} is unnecessary when "
+
s
"write ahead log is enabled, change to replication 1"
)
}
StorageLevel(storageLevel.useDisk, storageLevel.useMemory, storageLevel.useOffHeap,
false
,
1
)
}
class
StorageLevel
private
(
private
var
_
useDisk
:
Boolean,
private
var
_
useMemory
:
Boolean,
private
var
_
useOffHeap
:
Boolean,
private
var
_
deserialized
:
Boolean,
private
var
_
replication
:
Int
=
1
)
extends
Externalizable
val
NONE
=
new
StorageLevel(
false
,
false
,
false
,
false
)
val
DISK
_
ONLY
=
new
StorageLevel(
true
,
false
,
false
,
false
)
val
DISK
_
ONLY
_
2
=
new
StorageLevel(
true
,
false
,
false
,
false
,
2
)
val
MEMORY
_
ONLY
=
new
StorageLevel(
false
,
true
,
false
,
true
)
val
MEMORY
_
ONLY
_
2
=
new
StorageLevel(
false
,
true
,
false
,
true
,
2
)
val
MEMORY
_
ONLY
_
SER
=
new
StorageLevel(
false
,
true
,
false
,
false
)
val
MEMORY
_
ONLY
_
SER
_
2
=
new
StorageLevel(
false
,
true
,
false
,
false
,
2
)
val
MEMORY
_
AND
_
DISK
=
new
StorageLevel(
true
,
true
,
false
,
true
)
val
MEMORY
_
AND
_
DISK
_
2
=
new
StorageLevel(
true
,
true
,
false
,
true
,
2
)
val
MEMORY
_
AND
_
DISK
_
SER
=
new
StorageLevel(
true
,
true
,
false
,
false
)
val
MEMORY
_
AND
_
DISK
_
SER
_
2
=
new
StorageLevel(
true
,
true
,
false
,
false
,
2
)
val
OFF
_
HEAP
=
new
StorageLevel(
false
,
false
,
true
,
false
)
def
storeBlock(blockId
:
StreamBlockId, block
:
ReceivedBlock)
:
ReceivedBlockStoreResult
=
{
var
numRecords
=
None
:
Option[Long]
val
putResult
:
Seq[(BlockId, BlockStatus)]
=
block
match
{
case
ArrayBufferBlock(arrayBuffer)
=
>
numRecords
=
Some(arrayBuffer.size.toLong)
blockManager.putIterator(blockId, arrayBuffer.iterator, storageLevel,
tellMaster
=
true
)
case
IteratorBlock(iterator)
=
>
val
countIterator
=
new
CountingIterator(iterator)
val
putResult
=
blockManager.putIterator(blockId, countIterator, storageLevel,
tellMaster
=
true
)
numRecords
=
countIterator.count
putResult
case
ByteBufferBlock(byteBuffer)
=
>
blockManager.putBytes(blockId, byteBuffer, storageLevel, tellMaster
=
true
)
case
o
=
>
throw
new
SparkException(
s
"Could not store $blockId to block manager, unexpected block type ${o.getClass.getName}"
)
}
if
(!putResult.map {
_
.
_
1
}.contains(blockId)) {
throw
new
SparkException(
s
"Could not store $blockId to block manager with storage level $storageLevel"
)
}
BlockManagerBasedStoreResult(blockId, numRecords)
}
对于从kafka中直接读取数据,可以通过记录数据offset的方法来进行容错。如果程序崩溃,下次启动时,从上次未处理数据的offset再次读取数据即可。