Flink源码阅读----04
源码分析
1.MemorySegment
HeapMemorySegment和HybridMemorySegment都是MemorySegment类的子类
HeapMemorySegment表示此类表示由Flink管理的堆内存。该段由字节数组支持,并具有针对基本类型的随机放置和获取方法,以及比较和交换方法。此类专门用于堆内存的字节访问和字节复制调用,同时重用MemorySegment中的多字节类型访问和跨段操作。注意,通常不应手动分配内存段,而应通过 MemorySegmentFactory进行分配。
HybridMemorySegment表示Flink管理的一块内存。 内存可以是堆上的,堆外直接的或堆外不安全的,此类是透明处理的。 此类专门用于堆内存的字节访问和字节复制调用,同时重用MemorySegment中的多字节类型访问和跨段操作。此类包含HeapMemorySegment的功能,但是对于单个字节的操作效率较低。注意,通常不应手动分配内存段,而应通过 MemorySegmentFactory进行分配。
1.HeapMemorySegment
//heap memory managed
private byte[] memory;
HeapMemorySegment(byte[] memory) {
this(memory, null);
}
//memory 保存数据的字节数组
//owner 引用数据的所有者
HeapMemorySegment(byte[] memory, Object owner) {
super(Objects.requireNonNull(memory), owner);
this.memory = memory;
}
如果是对堆内存的额外引用,可以通过内置检查机制自动使字节数组检查失败,而无需额外检查。
----------------------------------------------对父类方法的重写-----------------------------------------------------
// 内存段的操作
/**
*释放此内存段。 在调用此操作后,无法对内存段进行进一步的操作,并且该操作将失败。仅在此内存段对象成为 *垃圾回收之后,才会释放实际的内存(堆或堆外)。这样可以确保不再存放任何数据并触发对释放段的检查
*/
@Override
public void free() {
super.free();
this.memory = null;
}
// offset 内存段里的便宜连,length 要包装为缓冲区的字节数
// 根据指定的 offset 和length作为一个段的长度,返回一个bytebuffer
@Override
public ByteBuffer wrap(int offset, int length) {
try {
return ByteBuffer.wrap(this.memory, offset, length);
}
catch (NullPointerException e) {
throw new IllegalStateException("segment has been freed");
}
}
// 获取刚刚指定的字节数组
public byte[] getArray() {
return this.memory;
}
//get set 方法
@Override
public final byte get(int index) {
return this.memory[index];
}
@Override
public final void put(int index, byte b) {
this.memory[index] = b;
}
@Override
public final void get(int index, byte[] dst) {
get(index, dst, 0, dst.length);
}
@Override
public final void put(int index, byte[] src) {
put(index, src, 0, src.length);
}
@Override
public final void get(int index, byte[] dst, int offset, int length) {
// system arraycopy does the boundary checks anyways, no need to check extra
System.arraycopy(this.memory, index, dst, offset, length);
}
@Override
public final void put(int index, byte[] src, int offset, int length) {
// system arraycopy does the boundary checks anyways, no need to check extra
System.arraycopy(src, offset, this.memory, index, length);
}
@Override
public final boolean getBoolean(int index) {
return this.memory[index] != 0;
}
@Override
public final void putBoolean(int index, boolean value) {
this.memory[index] = (byte) (value ? 1 : 0);
}
//批量读写
@Override
public final void get(DataOutput out, int offset, int length) throws IOException {
out.write(this.memory, offset, length);
}
@Override
public final void put(DataInput in, int offset, int length) throws IOException {
in.readFully(this.memory, offset, length);
}
//批量获取方法。从该内存段中复制numBytes个字节,从位置offset开始到目标ByteBuffer。字节将从缓冲区的当前位置开始放入目标缓冲区。如果此方法尝试写入的字节数多于剩余的目标字节缓冲区,将导致 java.nio.BufferOverflowException异常
@Override
public final void get(int offset, ByteBuffer target, int numBytes) {
// ByteBuffer 执行边界检查
target.put(this.memory, offset, numBytes);
}
@Override
public final void put(int offset, ByteBuffer source, int numBytes) {
//用于从中复制字节的ByteBuffer
source.get(this.memory, offset, numBytes);
}
---------------------------产生堆内存段的内存段工厂。该工厂不支持分配堆外内存--------------------------------------------------
public static final class HeapMemorySegmentFactory {
/**
* 先创建一个针对给定堆内存区域的新内存段
*/
public HeapMemorySegment wrap(byte[] memory) {
return new HeapMemorySegment(memory);
}
/**
* 分配一些未缓冲的内存,并创建一个表示该内存的新内存段。
*/
public HeapMemorySegment allocateUnpooledSegment(int size, Object owner) {
return new HeapMemorySegment(new byte[size], owner);
}
/**
* 创建一个包装给定字节数组的内存段
*/
public HeapMemorySegment wrapPooledHeapMemory(byte[] memory, Object owner) {
return new HeapMemorySegment(memory, owner);
}
/**
* 防止在外部进行实例化
*/
HeapMemorySegmentFactory() {}
}
2.HybridMemorySegment
//MemorySegment 操作
@Override
public ByteBuffer wrap(int offset, int length) {
if (address <= addressLimit) {
if (heapMemory != null) {
return ByteBuffer.wrap(heapMemory, offset, length);
}
else {
try {
ByteBuffer wrapper = offHeapBuffer.duplicate();
wrapper.limit(offset + length);
wrapper.position(offset);
return wrapper;
}
catch (IllegalArgumentException e) {
throw new IndexOutOfBoundsException();
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
//offHeapBuffer.duplicate方法创建一个共享该缓冲区内容的新字节缓冲区。新缓冲区的内容将是此缓冲区的内容。对该缓冲区内容的更改在新缓冲区中可见,反之亦然。两个缓冲区的位置,限制和标记值将是独立的。新缓冲区的容量,限制,位置和标记值将与此缓冲区的值相同。当且仅当该缓冲区是直接缓冲区时,新缓冲区才是直接缓冲区;当且仅当该缓冲区是只读缓冲区时,它才是只读缓冲区。
//get() and put() methods
@Override
public final byte get(int index) {
final long pos = address + index;
if (index >= 0 && pos < addressLimit) {
return UNSAFE.getByte(heapMemory, pos);
}
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
else {
// index is in fact invalid
throw new IndexOutOfBoundsException();
}
}
// 调用Unsafe方法来获取本机内存
/**
* The unsafe handle for transparent memory copied (heap / off-heap).
*/
@SuppressWarnings("restriction")
protected static final sun.misc.Unsafe UNSAFE = MemoryUtils.UNSAFE;
=================>
@SuppressWarnings({"restriction", "UseOfSunClasses"})
public static final sun.misc.Unsafe UNSAFE = getUnsafe();
//如果索引为负数,或者大于或等于内存段的大小,则抛出该异常
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
@Override
public final void get(int index, byte[] dst, int offset, int length) {
// check the byte array offset and length and the status
if ((offset | length | (offset + length) | (dst.length - (offset + length))) < 0) {
throw new IndexOutOfBoundsException();
}
final long pos = address + index;
if (index >= 0 && pos <= addressLimit - length) {
final long arrayAddress = BYTE_ARRAY_BASE_OFFSET + offset;
UNSAFE.copyMemory(heapMemory, pos, dst, arrayAddress, length);
}
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
else {
// index is in fact invalid
throw new IndexOutOfBoundsException();
}
}
//如果索引为负数,或者太大而导致请求的字节数超过索引和内存段末尾之间的内存量,则抛出该异常。
if ((offset | length | (offset + length) | (dst.length - (offset + length))) < 0) {
throw new IndexOutOfBoundsException();
}
//addressLimit 最后一个可寻址字节之后一个字节的地址,即address + size
//dst 将内存复制到的那个内存
-------------------------------------------------------------------批量读写---------------------------------------------------------------------------
@Override
public final void get(DataOutput out, int offset, int length) throws IOException {
if (address <= addressLimit) {
if (heapMemory != null) {
out.write(heapMemory, offset, length);
}
else {
while (length >= 8) {
out.writeLong(getLongBigEndian(offset));
offset += 8;
length -= 8;
}
while (length > 0) {
out.writeByte(get(offset));
offset++;
length--;
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
out.writeLong(getLongBigEndian(offset));
//out.writeLong 将由8个字节组成的long值写入字节流,写入的字节流按照下面的格式写入
* (byte)(0xff & (v >> 56))
* (byte)(0xff & (v >> 48))
* (byte)(0xff & (v >> 40))
* (byte)(0xff & (v >> 32))
* (byte)(0xff & (v >> 24))
* (byte)(0xff & (v >> 16))
* (byte)(0xff & (v >> 8))
* (byte)(0xff & v)
//可以通过接口DataInput的readLong方法读取此方法写入的字节,然后该方法将返回等于v的long值。
out.writeByte(get(offset));
//out.writeByte 将参数v的八个低位写入输出流。v的24个高位被忽略。(这意味着对于整数参数,writeByte 的作用与 write 完全相同。)用此方法写入的字节可由接口DataInput的readByte方法读取,然后将返回一个byte等于v。
//以big endian字节顺序将给定的long值(64bit,8个字节)写入给定位置。此方法的速度取决于系统的本机字节顺序,并且可能比putLong(int,long)慢。
//在大多数情况下(例如内存中的临时存储或I/O和网络的序列化),知道写入值的字节顺序与读取值的字节顺序相同,putLong(int,long)方法是首选.
@Override
public final void put(DataInput in, int offset, int length) throws IOException {
if (address <= addressLimit) {
if (heapMemory != null) {
in.readFully(heapMemory, offset, length);
}
else {
while (length >= 8) {
putLongBigEndian(offset, in.readLong());
offset += 8;
length -= 8;
}
while (length > 0) {
put(offset, in.readByte());
offset++;
length--;
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
字节顺序由来
在写字符流时,因为字符型只占一个字节数,计算机只须按一个字符一个字符写入文件即可。
但是如果是处理整型时,由于整型占4个字节,所以一个整型内部的字节存储排列的顺序直接关系到被计算机识别出来的整型值.
某种意义上也可直接理解计算机的识别顺序就是所谓的字节顺序。
感觉BigEndian就是地址越低的字节,数据中的偏移越高
Big-Endian 最重要的字节在整个内容的左端。
Little-Endian 最重要的字节在整个内容的右端。
低地址 高地址
高位字节 低位字节
----------------------------------------->
±±±±±±±±±±±±±±±±±±+
| 12 | 34 | 56 | 78 |
±±±±±±±±±±±±±±±±±±+
//BigEndian,在Unix运行
#include <unistd.h>
void main()
{
int i=0x41424344;
printf("int Address:%x Value:%x/n",&i,i);
printf("-------------------------------/n");
char* pAddress=(char*)&i;
int j;
for(j=0;j<=3;j++)
{
printf("char Address:%x Value:%c/n",pAddress,*pAddress);
pAddress++;
}
}
编译输出:
int Address:7f7f08f0 Value:41424344
-------------------------------
char Address:7f7f08f0 Value:A
char Address:7f7f08f1 Value:B
char Address:7f7f08f2 Value:C
char Address:7f7f08f3 Value:D
//LittleEndian 在win环境运行
#include <stdio.h>
void main()
{
int i=0x41424344;
printf("int Address:%x Value:%x/n",&i,i);
printf("-------------------------------/n");
char* pAddress=(char*)&i;
int j;
for(j=0;j<=3;j++)
{
printf("char Address:%x Value:%c/n",pAddress,*pAddress);
pAddress++;
}
}
编译输出:
int Address:12ff7c Value:41424344
-------------------------------
char Address:12ff7c Value:D
char Address:12ff7d Value:C
char Address:12ff7e Value:B
char Address:12ff7f Value:A
2.DataOutPutView
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-OeWcnwvQ-1585101544303)(https://s1.ax1x.com/2020/03/24/8qy8hj.png)]
1.DataOutputViewStreamWrapper
将OutputStream转换为DataOutputView的实用程序类
//跳过numBytes字节的内存。如果某些程序读取了跳过的内存,则结果不确定
//如果发生任何与I/O相关的问题,从而导致该视图无法前进到所需位置,则抛出该异常
@Override
public void skipBytesToWrite(int numBytes) throws IOException {
if (tempBuffer == null) {
tempBuffer = new byte[4096];
}
while (numBytes > 0) {
int toWrite = Math.min(numBytes, tempBuffer.length);
write(tempBuffer, 0, toWrite);
numBytes -= toWrite;
}
}
2.SeekableDataOutputView
标记 DataOutputView为可检索的接口。可以设置可搜索的视图写入的位置。
3.DataOutputSerializer
DataOutput接口的简单高效的序列化程序。
//获取对内部字节缓冲区的引用。该缓冲区可能大于实际的序列化数据。仅从0到length()的字节有效。下一次调用会覆盖缓冲区。
public byte[] getSharedBuffer() {
return buffer;
}
//获取缓冲区的副本,该副本具有到目前为止已序列化的数据的正确长度。返回的缓冲区是互斥副本,可以安全地使用而不会被以后对该串行器的写调用覆盖。
public byte[] getCopyOfBuffer() {
return Arrays.copyOf(buffer, position);
}
//将参数b的八个低阶位写入输出流。b的24个高位被忽略。
@Override
public void write(int b) throws IOException {
if (this.position >= this.buffer.length) {
resize(1);
}
this.buffer[this.position++] = (byte) (b & 0xff);
}
//从数组 b中依次写入len 个字节到输出流。如果b为null,则抛出NullPointerException。如果off 为负或 len 为负,或者off + len大于数组的长度 b,会引发 IndexOutOfBoundsException 。如果len为零,则不会写入任何字节。否则,先写字节b [off],然后写 b [off + 1],依此类推; 最后写入的字节是 b [off + len-1]
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (len < 0 || off > b.length - len) {
throw new ArrayIndexOutOfBoundsException();
}
if (this.position > this.buffer.length - len) {
resize(len);
}
System.arraycopy(b, off, this.buffer, this.position, len);
this.position += len;
}
3.数据流转过程
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-yWdFcwn0-1585101544303)(https://s1.ax1x.com/2020/03/17/8UUCEn.png)]
1.数据在operator中处理
数据在operator中进行处理后,我们经常会调用out.collect(…),这类方法将数据发送到下游,而这个方法,会将数据封装成StreamRecord,内部包含了时间戳等信息。
/** The actual value held by this record. */
private T value;
/** The timestamp of the record. */
private long timestamp;
/** Flag whether the timestamp is actually set. */
private boolean hasTimestamp;
2.数据提交给RecordWrite处理分发
RecordWriter负责将数据写入RequsetPartition中去。提交给RecordWrite很简单,就是以下代码,在RecordWriterOutput类中。
@Override
public void collect(StreamRecord<OUT> record) {
if (this.outputTag != null) {
// we are not responsible for emitting to the main output.
return;
}
pushToRecordWriter(record);
}
@Override
public <X> void collect(OutputTag<X> outputTag, StreamRecord<X> record) {
if (this.outputTag == null || !this.outputTag.equals(outputTag)) {
// we are not responsible for emitting to the side-output specified by this
// OutputTag.
return;
}
pushToRecordWriter(record);
}
private <X> void pushToRecordWriter(StreamRecord<X> record) {
serializationDelegate.setInstance(record);
try {
recordWriter.emit(serializationDelegate);
}
catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
3.RecordWriter处理数据
在recordWriter初始化的时候,默认会开启一个守护线程,定时的去flush一下通道里面的数据。
//RecordWriter
RecordWriter(ResultPartitionWriter writer, long timeout, String taskName) {
this.targetPartition = writer;
this.numberOfChannels = writer.getNumberOfSubpartitions();
this.serializer = new SpanningRecordSerializer<T>();
checkArgument(timeout >= -1);
this.flushAlways = (timeout == 0);
if (timeout == -1 || timeout == 0) {
outputFlusher = null;
} else {
String threadName = taskName == null ?
DEFAULT_OUTPUT_FLUSH_THREAD_NAME :
DEFAULT_OUTPUT_FLUSH_THREAD_NAME + " for " + taskName;
//开启一个守护线程,定时去flushAll
outputFlusher = new OutputFlusher(threadName, timeout);
outputFlusher.start();
}
}
recordWriter接收数据,并序列化写入channel的是在emit方法。
protected void emit(T record, int targetChannel) throws IOException, InterruptedException {
checkErroneous();
//将数据序列化成ByteBuffer(JAVA NIO的缓冲区)
serializer.serializeRecord(record);
// Make sure we don't hold onto the large intermediate serialization buffer for too long
if (copyFromSerializerToTargetChannel(targetChannel)) {
serializer.prune();
}
}
先看序列化的方法,将会数据写入java.nio.ByteBuffer中去,下面的dataBuffer 就是java.nio.ByteBuffer的实例。
//SpanningRecordSerializer
public void serializeRecord(T record) throws IOException {
if (CHECKED) {
if (dataBuffer.hasRemaining()) {
throw new IllegalStateException("Pending serialization of previous record.");
}
}
serializationBuffer.clear();
// the initial capacity of the serialization buffer should be no less than 4
serializationBuffer.skipBytesToWrite(4);
/**
* 这里就是各种序列化各显神通的地方了,怎么样去序列化都在这里体现
* 最终都会写入serializationBuffer中
*/
// write data and length
record.write(serializationBuffer);
int len = serializationBuffer.length() - 4;
serializationBuffer.setPosition(0);
serializationBuffer.writeInt(len);
serializationBuffer.skipBytesToWrite(len);
dataBuffer = serializationBuffer.wrapAsByteBuffer();
}
emit会调用copyFromSerializerToTargetChannel方法,这里会将数据写入对应的channel中去,channel中通过BufferBuilder去接收数据,而BufferBuilder中就封装了Flink内部著名的MemorySegment。
protected boolean copyFromSerializerToTargetChannel(int targetChannel) throws IOException, InterruptedException {
// We should reset the initial position of the intermediate serialization buffer before
// copying, so the serialization results can be copied to multiple target buffers.
serializer.reset();
boolean pruneTriggered = false;
//获取当前的memorysegment,如果当前没有,那么就去申请。BufferBuilder中封装了memorysegment
BufferBuilder bufferBuilder = getBufferBuilder(targetChannel);
//往bufferBuilder写入数据
SerializationResult result = serializer.copyToBufferBuilder(bufferBuilder);
/**
* 1.如果NIO的缓冲区没有写满,那么就继续写不会触发break,继续往memorySegment中写---> result.isFullBuffer = true result.isFullRecord = false
* 2.如果NIO的缓冲区写满了,而memorySegment没有写满,直接跳出循环,进行flush---> result.isFullBuffer = false result.isFullRecord = true
* 3.如果NIO的缓存区和memorySegment都写满了,那么会清空targetChannel中的数据,然后跳出循环---> result.isFullBuffer = true result.isFullRecord = true
*/
while (result.isFullBuffer()) {
//用于输出监控内容
finishBufferBuilder(bufferBuilder);
// If this was a full record, we are done. Not breaking out of the loop at this point
// will lead to another buffer request before breaking out (that would not be a
// problem per se, but it can lead to stalls in the pipeline).
if (result.isFullRecord()) {
pruneTriggered = true;
emptyCurrentBufferBuilder(targetChannel);
break;
}
//从localbufferpool中去拉取memorySegment,localbufferpool如果有可用的memorySegment,就直接返回,没有就重新去申请
bufferBuilder = requestNewBufferBuilder(targetChannel);
//往bufferBuilder写入数据
result = serializer.copyToBufferBuilder(bufferBuilder);
}
checkState(!serializer.hasSerializedData(), "All data should be written at once");
if (flushAlways) {
flushTargetPartition(targetChannel);
}
return pruneTriggered;
}
申请内存
//requestNewBufferBuilder
// @Override
public BufferBuilder requestNewBufferBuilder(int targetChannel) throws IOException, InterruptedException {
checkState(bufferBuilders[targetChannel] == null || bufferBuilders[targetChannel].isFinished());
BufferBuilder bufferBuilder = targetPartition.getBufferBuilder();
targetPartition.addBufferConsumer(bufferBuilder.createBufferConsumer(), targetChannel);
bufferBuilders[targetChannel] = bufferBuilder;
return bufferBuilder;
}
@Override
public BufferBuilder getBufferBuilder() throws IOException, InterruptedException {
checkInProduceState();
return bufferPool.requestBufferBuilderBlocking();
}
@Override
public BufferBuilder requestBufferBuilderBlocking() throws IOException, InterruptedException {
return toBufferBuilder(requestMemorySegmentBlocking());
}
private BufferBuilder toBufferBuilder(MemorySegment memorySegment) {
if (memorySegment == null) {
return null;
}
return new BufferBuilder(memorySegment, this);
}
//本地BufferPool
private MemorySegment requestMemorySegmentBlocking() throws InterruptedException, IOException {
MemorySegment segment;
while ((segment = requestMemorySegment()) == null) {
try {
// wait until available
isAvailable().get();
} catch (ExecutionException e) {
LOG.error("The available future is completed exceptionally.", e);
ExceptionUtils.rethrow(e);
}
}
return segment;
}
数据从数据序列化缓冲区(ByteBuffer)写入BufferBuilder中
@Override
public SerializationResult copyToBufferBuilder(BufferBuilder targetBuffer) {
targetBuffer.append(dataBuffer);
targetBuffer.commit();
//判断是否有剩余空间,可以继续往下写
return getSerializationResult(targetBuffer);
}
private SerializationResult getSerializationResult(BufferBuilder targetBuffer) {
if (dataBuffer.hasRemaining()) {
//如果nio的ByteBuffer(缓冲区)还有空间,那么继续往这个dataBuffer写
return SerializationResult.PARTIAL_RECORD_MEMORY_SEGMENT_FULL;
}
//如果满了,就会判断是否是memory_segment满了
return !targetBuffer.isFull()
? SerializationResult.FULL_RECORD //只是缓冲区满了
: SerializationResult.FULL_RECORD_MEMORY_SEGMENT_FULL;
}
flush操作,会触发 ResultPartition的flush方法,然后触发ResultSubpartition的flush方法,其中ResultSubpartition方法有两个实现类,BoundedBlockingSubpartition是对应的有界数据集,而PipelinedSubpartition对应的是有界和无界数据集。
//PipelinedSubpartition 最后会开启一个线程去调用netty方法把数据写出去
@Override
public void notifyDataAvailable() {
requestQueue.notifyReaderNonEmpty(this);
}
void notifyReaderNonEmpty(final NetworkSequenceViewReader reader) {
// The notification might come from the same thread. For the initial writes this
// might happen before the reader has set its reference to the view, because
// creating the queue and the initial notification happen in the same method call.
// This can be resolved by separating the creation of the view and allowing
// notifications.
// TODO This could potentially have a bad performance impact as in the
// worst case (network consumes faster than the producer) each buffer
// will trigger a separate event loop task being scheduled.
ctx.executor().execute(() -> ctx.pipeline().fireUserEventTriggered(reader));
}