netty源码浅析-池化内存直接申请

胖柯G

已于 2023-07-30 15:45:46 修改

阅读量172

点赞数

分类专栏： netty 文章标签： java

于 2023-07-30 15:36:22 首次发布

本文链接：https://blog.csdn.net/GeekerJava/article/details/132007690

版权

netty 专栏收录该内容

10 篇文章

订阅专栏

池化内存直接申请

池化内存申请是我们的重点我们以下面方法为入口分析，池化内存的分配过程

PooledByteBufAllocator pooledByteBufAllocator = PooledByteBufAllocator.DEFAULT;
pooledByteBufAllocator.buffer(100);
 //根据平台是否支持直接内存创建PooledByteBufAllocator
 public static final PooledByteBufAllocator DEFAULT =
            new PooledByteBufAllocator(PlatformDependent.directBufferPreferred());

我们逐步跟踪进入PooledByteBufAllocator的构造函数中

public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
                                  int tinyCacheSize, int smallCacheSize, int normalCacheSize,
                                  boolean useCacheForAllThreads, int directMemoryCacheAlignment) {
        super(preferDirect);
        threadCache = new PoolThreadLocalCache(useCacheForAllThreads);//PoolThreadLocalCache-> FastThreadLocal->ThreadLocal
        this.tinyCacheSize = tinyCacheSize;//512
        this.smallCacheSize = smallCacheSize;//256
        this.normalCacheSize = normalCacheSize;//64
        chunkSize = validateAndCalculateChunkSize(pageSize, maxOrder);//预申请的内存块大小为pageSize*2^maxOrder=16M

        checkPositiveOrZero(nHeapArena, "nHeapArena");//nHeapArena必须为正数 core*2或者是可以分配可以分配内存块的三分之二
        checkPositiveOrZero(nDirectArena, "nDirectArena");//nDirectArena必须为正数core*2或者是可以分配可以分配内存块的三分之二

        checkPositiveOrZero(directMemoryCacheAlignment, "directMemoryCacheAlignment");//directMemoryCacheAlignment必须为正数 0
        if (directMemoryCacheAlignment > 0 && !isDirectMemoryCacheAlignmentSupported()) {
            throw new IllegalArgumentException("directMemoryCacheAlignment is not supported");
        }
        //directMemoryCacheAlignment必须是二的次方
        if ((directMemoryCacheAlignment & -directMemoryCacheAlignment) != directMemoryCacheAlignment) {
            throw new IllegalArgumentException("directMemoryCacheAlignment: "
                    + directMemoryCacheAlignment + " (expected: power of two)");
        }

        int pageShifts = validateAndCalculatePageShifts(pageSize);//2^13=8192--》13

        if (nHeapArena > 0) {
            heapArenas = newArenaArray(nHeapArena);//new PoolArena[cpu*2]
            List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(heapArenas.length);
            //循环初始化，每个线程创建一个heapArenas
            for (int i = 0; i < heapArenas.length; i ++) {
                PoolArena.HeapArena arena = new PoolArena.HeapArena(this,
                        pageSize, maxOrder, pageShifts, chunkSize,
                        directMemoryCacheAlignment);
                heapArenas[i] = arena;
                metrics.add(arena);
            }
            heapArenaMetrics = Collections.unmodifiableList(metrics);
        } else {
            heapArenas = null;
            heapArenaMetrics = Collections.emptyList();
        }

        if (nDirectArena > 0) {
            directArenas = newArenaArray(nDirectArena);
            List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(directArenas.length);
            for (int i = 0; i < directArenas.length; i ++) {
                PoolArena.DirectArena arena = new PoolArena.DirectArena(
                        this, pageSize, maxOrder, pageShifts, chunkSize, directMemoryCacheAlignment);
                directArenas[i] = arena;
                metrics.add(arena);
            }
            directArenaMetrics = Collections.unmodifiableList(metrics);
        } else {
            directArenas = null;
            directArenaMetrics = Collections.emptyList();
        }
        metric = new PooledByteBufAllocatorMetric(this);
    }

我们将上面的构造函数方法分开来看首先是对一些值进行赋值然后对一些值进行校验


        if (nHeapArena > 0) {
            heapArenas = newArenaArray(nHeapArena);//new PoolArena[nHeapArena]
            List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(heapArenas.length);
            //循环初始化，每个线程创建一个heapArenas
            for (int i = 0; i < heapArenas.length; i ++) {
                PoolArena.HeapArena arena = new PoolArena.HeapArena(this,
                        pageSize, maxOrder, pageShifts, chunkSize,
                        directMemoryCacheAlignment);
                heapArenas[i] = arena;
                metrics.add(arena);
            }
            heapArenaMetrics = Collections.unmodifiableList(metrics);
        } else {
            heapArenas = null;
            heapArenaMetrics = Collections.emptyList();
        }

这里nHeapArena就是core*2或者是可以分配内存块数量的三分之二，这里如果核心线程数的二倍那就和evenLoopGroup中线程的数量相同，这里创建了 new HeapArena[size]，然后进行初始化

HeapArena(PooledByteBufAllocator parent, int pageSize, int maxOrder,
                int pageShifts, int chunkSize, int directMemoryCacheAlignment) {
            super(parent, pageSize, maxOrder, pageShifts, chunkSize,
                    directMemoryCacheAlignment);
        }

this.parent = parent;//PooledByteBufAllocator
        this.pageSize = pageSize;//8K
        this.maxOrder = maxOrder;//11
        this.pageShifts = pageShifts;//13
        this.chunkSize = chunkSize;//16M
        directMemoryCacheAlignment = cacheAlignment;//0
        directMemoryCacheAlignmentMask = cacheAlignment - 1;//-1
        subpageOverflowMask = ~(pageSize - 1);//-8192，其实就是相当于低13位全是0
        tinySubpagePools = newSubpagePoolArray(numTinySubpagePools);//new PoolSubpage[32];
        //初始化32个subPage
        for (int i = 0; i < tinySubpagePools.length; i ++) {
            tinySubpagePools[i] = newSubpagePoolHead(pageSize);
        }

        numSmallSubpagePools = pageShifts - 9;//4
        smallSubpagePools = newSubpagePoolArray(numSmallSubpagePools);//new PoolSubpage[4];
        //初始化4个subPage
        for (int i = 0; i < smallSubpagePools.length; i ++) {
            smallSubpagePools[i] = newSubpagePoolHead(pageSize);
        }
        //创建PoolChunkList并组成双向链表，不过这里面qInit的pre指向自己，q00的pre指向null
        q100 = new PoolChunkList<T>(this, null, 100, Integer.MAX_VALUE, chunkSize);
        q075 = new PoolChunkList<T>(this, q100, 75, 100, chunkSize);
        q050 = new PoolChunkList<T>(this, q075, 50, 100, chunkSize);
        q025 = new PoolChunkList<T>(this, q050, 25, 75, chunkSize);
        q000 = new PoolChunkList<T>(this, q025, 1, 50, chunkSize);
        qInit = new PoolChunkList<T>(this, q000, Integer.MIN_VALUE, 25, chunkSize);

        q100.prevList(q075);
        q075.prevList(q050);
        q050.prevList(q025);
        q025.prevList(q000);
        q000.prevList(null);
        qInit.prevList(qInit);

        List<PoolChunkListMetric> metrics = new ArrayList<PoolChunkListMetric>(6);
        metrics.add(qInit);
        metrics.add(q000);
        metrics.add(q025);
        metrics.add(q050);
        metrics.add(q075);
        metrics.add(q100);
        chunkListMetrics = Collections.unmodifiableList(metrics);
    }

上面的代码我们分开来看，首先执行一些赋值操作，然后创建了32个tiny类型的PoolSubpage和4个small类型的PoolSubpage，我们继续跟踪到PoolSubpage的内部

//初始化32个subPage
        for (int i = 0; i < tinySubpagePools.length; i ++) {
            tinySubpagePools[i] = newSubpagePoolHead(pageSize);
        }

        numSmallSubpagePools = pageShifts - 9;//4
        smallSubpagePools = newSubpagePoolArray(numSmallSubpagePools);//new PoolSubpage[4];
        //初始化4个subPage
        for (int i = 0; i < smallSubpagePools.length; i ++) {
            smallSubpagePools[i] = newSubpagePoolHead(pageSize);
        }

static final int numTinySubpagePools = 512 >>> 4;//32

private PoolSubpage<T> newSubpagePoolHead(int pageSize) {
        PoolSubpage<T> head = new PoolSubpage<T>(pageSize);
        head.prev = head;
        head.next = head;
        return head;
    }

PoolSubpage(int pageSize) {
        chunk = null;
        memoryMapIdx = -1;
        runOffset = -1;
        elemSize = -1;
        this.pageSize = pageSize;//8K
        bitmap = null;
    }

这里可以看到创建了一个长度位32的PoolSubpage，并进行了初始化，并且PoolSubpage是一个双向链表结构，首次创建时，头部节点指向自己。

numSmallSubpagePools = pageShifts - 9;//4
smallSubpagePools = newSubpagePoolArray(numSmallSubpagePools);//new PoolSubpage[4];
//初始化4个subPage
for (int i = 0; i < smallSubpagePools.length; i ++) {
    smallSubpagePools[i] = newSubpagePoolHead(pageSize);
}

可以看到然后又创建了长度为4的PoolSubpage数组，并且进行了初始化，然后形成了链表结构

//创建PoolChunkList并组成双向链表，不过这里面qInit的pre指向自己，q00的pre指向null
        q100 = new PoolChunkList<T>(this, null, 100, Integer.MAX_VALUE, chunkSize);
        q075 = new PoolChunkList<T>(this, q100, 75, 100, chunkSize);
        q050 = new PoolChunkList<T>(this, q075, 50, 100, chunkSize);
        q025 = new PoolChunkList<T>(this, q050, 25, 75, chunkSize);
        q000 = new PoolChunkList<T>(this, q025, 1, 50, chunkSize);
        qInit = new PoolChunkList<T>(this, q000, Integer.MIN_VALUE, 25, chunkSize);

        q100.prevList(q075);
        q075.prevList(q050);
        q050.prevList(q025);
        q025.prevList(q000);
        q000.prevList(null);
        qInit.prevList(qInit);

然后创建了6个PoolChunkList，他们代表每个chunk的使用率是多少

对象名	最低使用率	最高使用率
qInit	Integer.MIN_VALUE	25
q000	1	50
q025	25	75
q050	50	100
q075	75	100
q100	100	Integer.MAX_VALUE

PoolChunkList是为了组织poolChunk，根据poolChunk的使用率移动到不同的PoolChunkList中，这样我们在下次申请内存的时候就可以直接通过PoolChunkList中的poolChunk进行申请。关于PoolChunk我们后面在介绍

然后将这些PoolChunkList除了qInit外，其他的组成一个双向链表，但是qInit的next执行q000。下面我们回到内存分配方法。这里我们可以看到使用率之间会有部分交集，主要可能是为了，防止使用率不断升高然后下降，导致poolChunk在双向链表中不停的移动。

public ByteBuf buffer(int initialCapacity) {
        if (directByDefault) {
            return directBuffer(initialCapacity);
        }
        return heapBuffer(initialCapacity);
    }

上面的代码我们在分析byteBuf分配器的时候也有提到最后会走到

protected abstract ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity);

抽象方法，具体的实现交给子类完成，我们继续跟踪，来到PooledByteBufAllocator#newDirectBuffer，

protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) {
        //每个线程都有一个threadCache，第一次执行会在这里初始化
        PoolThreadCache cache = threadCache.get();
        PoolArena<ByteBuffer> directArena = cache.directArena;

        final ByteBuf buf;
        if (directArena != null) {
            buf = directArena.allocate(cache, initialCapacity, maxCapacity);
        } else {
            buf = PlatformDependent.hasUnsafe() ?
                    UnsafeByteBufUtil.newUnsafeDirectByteBuf(this, initialCapacity, maxCapacity) :
                    new UnpooledDirectByteBuf(this, initialCapacity, maxCapacity);
        }

        return toLeakAwareBuffer(buf);
    }

这里为了减少线程竞争，会给每个线程创建一个PoolThreadCache，可以从PoolThreadCache申请，这里我们先跳过，后面在分析通过缓存申请的情况，这里我们分析直接分配的情况。

protected PooledByteBuf<ByteBuffer> newByteBuf(int maxCapacity) {
    //unsafe
            if (HAS_UNSAFE) {
                return PooledUnsafeDirectByteBuf.newInstance(maxCapacity);
            } else {
                return PooledDirectByteBuf.newInstance(maxCapacity);
            }
        }

static PooledUnsafeDirectByteBuf newInstance(int maxCapacity) {
    //从对象回收站中获取一个对象，如果没有则直接创建
        PooledUnsafeDirectByteBuf buf = RECYCLER.get();
    //重置byteBuf，设置初始值
        buf.reuse(maxCapacity);
        return buf;
    }

final void reuse(int maxCapacity) {
        maxCapacity(maxCapacity);
        resetRefCnt();
        setIndex0(0, 0);
        discardMarks();
    }

这里我们看到会根据是safe或者是unsafe类型，从对象回收站中获取一个pooledByteBuf对象，如果获取不到就创建一个对象，然后设置一些初始值，比如将读写指针、标记指针赋值为0，设置最大容量。然后我们就可以使用和这个对象了。我们再次回到上面的申请方法

private void allocate(PoolThreadCache cache, PooledByteBuf<T> buf, final int reqCapacity) {
        //规范后值
        final int normCapacity = normalizeCapacity(reqCapacity);//272
        if (isTinyOrSmall(normCapacity)) { // 需要申请的大小是否小于8K
            int tableIdx;
            PoolSubpage<T>[] table;
            //是不是tiny类型
            boolean tiny = isTiny(normCapacity);
            if (tiny) { // < 512
                //先从缓存中分配内存，分配成功直接返回
                if (cache.allocateTiny(this, buf, reqCapacity, normCapacity)) {//100 112
                    // was able to allocate out of the cache so move on
                    return;
                }
                tableIdx = tinyIdx(normCapacity);//7
                table = tinySubpagePools;
            } else {
                if (cache.allocateSmall(this, buf, reqCapacity, normCapacity)) {
                    // was able to allocate out of the cache so move on
                    return;
                }
                tableIdx = smallIdx(normCapacity);
                table = smallSubpagePools;
            }
            //tiny类型table中分配normCapacity的位置
            final PoolSubpage<T> head = table[tableIdx];//table[7]

            /**
             * Synchronize on the head. This is needed as {@link PoolChunk#allocateSubpage(int)} and
             * {@link PoolChunk#free(long)} may modify the doubly linked list as well.
             */
            synchronized (head) {
                //第一次分配双向链表中就一个节点，head.next就是head
                final PoolSubpage<T> s = head.next;
                //如果分配内存大小之前已经分配过了
                if (s != head) {
                    assert s.doNotDestroy && s.elemSize == normCapacity;
                    long handle = s.allocate();
                    assert handle >= 0;
                    s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity);
                    incTinySmallAllocation(tiny);
                    return;
                }
            }
            //创建新的PoolChunk分配
            synchronized (this) {
                allocateNormal(buf, reqCapacity, normCapacity);
            }

            incTinySmallAllocation(tiny);
            return;
        }
        //分配norm类型，8K-16M
        if (normCapacity <= chunkSize) {
            if (cache.allocateNormal(this, buf, reqCapacity, normCapacity)) {
                // was able to allocate out of the cache so move on
                return;
            }
            synchronized (this) {
                allocateNormal(buf, reqCapacity, normCapacity);
                ++allocationsNormal;
            }
        } else {
            // Huge allocations are never served via the cache so just call allocateHuge
            //分配huge对象，超过了16M
            allocateHuge(buf, reqCapacity);
        }
    }

可以看到针对申请内存大小可以分为下面几种

内存类型	范围
tiny	0-512
small	512-8K
normal	8K-16M
huge	16M以上
首先对申请值进行规范化处理

final int normCapacity = normalizeCapacity(reqCapacity);//112

int normalizeCapacity(int reqCapacity) {//默认为256
        checkPositiveOrZero(reqCapacity, "reqCapacity");

        if (reqCapacity >= chunkSize) {
            return directMemoryCacheAlignment == 0 ? reqCapacity : alignCapacity(reqCapacity);
        }
        //如果不是tiny类型则申请内存为大于申请值的2的幂值
        if (!isTiny(reqCapacity)) { // >= 512
            // Doubled

            int normalizedCapacity = reqCapacity;
            normalizedCapacity --;
            normalizedCapacity |= normalizedCapacity >>>  1;
            normalizedCapacity |= normalizedCapacity >>>  2;
            normalizedCapacity |= normalizedCapacity >>>  4;
            normalizedCapacity |= normalizedCapacity >>>  8;
            normalizedCapacity |= normalizedCapacity >>> 16;
            normalizedCapacity ++;

            if (normalizedCapacity < 0) {
                normalizedCapacity >>>= 1;
            }
            assert directMemoryCacheAlignment == 0 || (normalizedCapacity & directMemoryCacheAlignmentMask) == 0;

            return normalizedCapacity;
        }

        if (directMemoryCacheAlignment > 0) {
            return alignCapacity(reqCapacity);
        }

        // Quantum-spaced
        //申请大小为tiny类型，如果是16的倍数字节返回
        if ((reqCapacity & 15) == 0) {
            return reqCapacity;
        }
        //如果不是16的倍数，则是大于申请值的最小的16倍数值
        return (reqCapacity & ~15) + 16;
    }

这里如果申请的类型是tiny类型，如果申请的大小是16的倍数则直接返回，如果不是就调整值为大于申请值的最小的16倍数值，如果申请的不是tiny类型，而是其他几种类型，则申请的大小调整为大于申请值的最小2的幂次数值。这里我们申请的值是100，规范化后的数值为112。我们继续回到申请方法中，判断申请的内存大小是不是小于一个page的大小，如果是则会将page分割成申请大小的规范化后的值的subPage。

范围含义	内存大小
chunk	16M
page	8K
subPage	小于8K
netty申请内存是按照chunk向操作系统进行申请，page是chunk切分后的结果，一个chunk最多可以切分为2048个page，而subPage是对page分割的，subPage最小是16B大小，也就是一个page可以切分为512个subPage，这样当申请内存不足一个page时不会导致内存浪费。我们继续回到上面的方法。

判断申请的大小是不是不足一个page，如果是就需要对page进行切分，然后判断申请的类型是tiny类型还是small类型，这里我们申请的大小是tiny类型，然后会先通过cache进行申请，这个我们在下面分析，现在分析直接申请

tableIdx = tinyIdx(normCapacity);//7
//除16
static int tinyIdx(int normCapacity) {
        return normCapacity >>> 4;
    }

计算规范化的申请值在数组中的下标索引，然后获取tinySubpagePools对象位置的PoolSubpage，这个tinySubpagePools就是我们在上面说的在创建poolArena的时候会创建的长度为32的poolSubPage数组，我们上面分析过只是创建了这个poolSubpage，并且双向链表的head指向的是自己。然后进入同步方法

synchronized (head) {
                //第一次分配双向链表中就一个节点，head.next就是head
                final PoolSubpage<T> s = head.next;
                //如果分配内存大小之前已经分配过了
                if (s != head) {
                    assert s.doNotDestroy && s.elemSize == normCapacity;
                    long handle = s.allocate();
                    assert handle >= 0;
                    s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity);
                    incTinySmallAllocation(tiny);
                    return;
                }
            }

这里同步是为了防止同时有其他线程操作head，上面说了第一次创建的时候head.next指向的就是head，这里不会进入if方法，那if方法什么时候进入呢，其实本次申请内存后这个poolSubpage就会连接到head下，在下次申请相同大小的内存时，当然也不是完全相同是规范化的值相同时，就会进入if方法，找到上次分配的chunk直接分配。我们继续往下跟踪

//创建新的PoolChunk分配
            synchronized (this) {
                allocateNormal(buf, reqCapacity, normCapacity);
            }

private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
        //先在已有的poolChunkList中分配，第一次分配会失败
        if (q050.allocate(buf, reqCapacity, normCapacity) || q025.allocate(buf, reqCapacity, normCapacity) ||
            q000.allocate(buf, reqCapacity, normCapacity) || qInit.allocate(buf, reqCapacity, normCapacity) ||
            q075.allocate(buf, reqCapacity, normCapacity)) {
            return;
        }

        // Add a new chunk.添加新的PoolChunk
        PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize);
        //开始分配内存
        boolean success = c.allocate(buf, reqCapacity, normCapacity);
        assert success;
        qInit.add(c);
    }

首先会在poolChunkList链表中查找chunk进行分配，如果申请成功就直接返回，这里我们是第一次申请还没有创建poolChunk，所以会执行下面的方法

// Add a new chunk.添加新的PoolChunk
        PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize);
        //开始分配内存
        boolean success = c.allocate(buf, reqCapacity, normCapacity);

这里会新建一个poolChunk进行内存分配，内存申请都是通过poolChunk来组织的我们来看看poolChunk的构造函数

PoolChunk(PoolArena<T> arena, T memory, int pageSize, int maxOrder, int pageShifts, int chunkSize, int offset) {
        unpooled = false;
        this.arena = arena;
        this.memory = memory;//byte[16M]
        this.pageSize = pageSize;//8K
        this.pageShifts = pageShifts;//13
        this.maxOrder = maxOrder;//11
        this.chunkSize = chunkSize;//16M
        this.offset = offset;//0
        unusable = (byte) (maxOrder + 1);//12
        log2ChunkSize = log2(chunkSize);//24
        subpageOverflowMask = ~(pageSize - 1);//
        //剩余可分配内存大小
        freeBytes = chunkSize;

        assert maxOrder < 30 : "maxOrder should be < 30, but is: " + maxOrder;
        maxSubpageAllocs = 1 << maxOrder;//2048

        // Generate the memory map.
        memoryMap = new byte[maxSubpageAllocs << 1];//new byte[4096]
        depthMap = new byte[memoryMap.length];//new byte[4096]
        int memoryMapIndex = 1;
        //构造一颗深度为11的完全二叉树，其实就是一个小顶堆
        for (int d = 0; d <= maxOrder; ++ d) { // move down the tree one level at a time
            int depth = 1 << d;
            for (int p = 0; p < depth; ++ p) {
                // in each level traverse left to right and set value to the depth of subtree
                memoryMap[memoryMapIndex] = (byte) d;
                depthMap[memoryMapIndex] = (byte) d;
                memoryMapIndex ++;
            }
        }

        subpages = newSubpageArray(maxSubpageAllocs);//new PoolSubpage[2048];
        cachedNioBuffers = new ArrayDeque<ByteBuffer>(8);
    }

首先还是一些赋值操作，并进行一些校验，然后创建了一个memoryMap和depthMap，者两个数组是其实存储的是一个完全二叉树结构，只是完全二叉树结构非常适合数组存储，memoryMap是管理内存的申请，depthMap是树的深度。这颗树的深度为11，最底下一层就是2^11个page，层数越高能分配的内存大小越大，最高层可以分配16M大小。数组内容为[0, 0, 1, 1, 2, 2, 2, 2, 3, 3,…]，索引为0的位置没有意义，这样这颗树的父节点下标位置为n则他的左子节点位置就是2n，右子节点位置就是2n+1。
我们继续分析现在poolChunk已经创建好了，内存也已经申请了，下面开始分配

boolean success = c.allocate(buf, reqCapacity, normCapacity);

boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
        final long handle;
        if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
            //需要分配的内存大于8K
            handle =  allocateRun(normCapacity);
        } else {
            //需要分配的内存小于于8K，不足一个page
            handle = allocateSubpage(normCapacity);
        }

        if (handle < 0) {
            return false;
        }
        ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null;
        initBuf(buf, nioBuffer, handle, reqCapacity);
        return true;
    }

判断申请的大小是不是一个大于一个page，如果大于一个page就不用对page进行切分，如果是小于一个page就需要对page进行切分，这里我们申请的是112B，不足一个page，会走到else方法

private long allocateSubpage(int normCapacity) {
        // Obtain the head of the PoolSubPage pool that is owned by the PoolArena and synchronize on it.
        // This is need as we may add it back and so alter the linked-list structure.
        PoolSubpage<T> head = arena.findSubpagePoolHead(normCapacity);//112  tinySubpagePools[7]
        int d = maxOrder; // subpages are only be allocated from pages i.e., leaves
        synchronized (head) {
            int id = allocateNode(d);//返回查找到的可用节点
            if (id < 0) {
                return id;
            }

            final PoolSubpage<T>[] subpages = this.subpages;
            final int pageSize = this.pageSize;

            freeBytes -= pageSize;//可用内存

            int subpageIdx = subpageIdx(id);//获取使用哪个page
            PoolSubpage<T> subpage = subpages[subpageIdx];
            //前面只是创建数组，如果该位置还没有初始化
            if (subpage == null) {
                //runOffset(id)是计算在chunk中的起始位置，以0开始
                subpage = new PoolSubpage<T>(head, this, id, runOffset(id), pageSize, normCapacity);
                subpages[subpageIdx] = subpage;
            } else {
                subpage.init(head, normCapacity);
            }
            //正在开始分配
            return subpage.allocate();
        }
    }

PoolSubpage<T> head = arena.findSubpagePoolHead(normCapacity);

PoolSubpage<T> findSubpagePoolHead(int elemSize) {//默认112
        int tableIdx;
        PoolSubpage<T>[] table;
        //如果分配的是tiny类型
        if (isTiny(elemSize)) { // < 512
            //数组每个位置分配内存大小都是16的倍数
            tableIdx = elemSize >>> 4;//7
            table = tinySubpagePools;
        } else {
            //分配的是small类型
            tableIdx = 0;
            //small数组每个位置大小都是1024的倍数
            elemSize >>>= 10;//除1024
            //找到要分配内存大小在small数组中的位置
            while (elemSize != 0) {
                elemSize >>>= 1;
                tableIdx ++;
            }
            table = smallSubpagePools;
        }
        //然后返回这个PoolSubpage
        return table[tableIdx];
    }

根据规范化后的申请大小获取tiny或者small类型PoolSubpage数组上的PoolSubpage。然后执行

int id = allocateNode(d)//返回查找到的可用节点
private int allocateNode(int d) {//11
        int id = 1;
        int initial = - (1 << d); // has last d bits = 0 and rest all = 1  -2048
        //获取二叉树中的索引为1的位置，
        // 索引为0的位置跳过了，这样索引下标为n的节点子节点位置就是2n和2n+1
        byte val = value(id);//0
        //如果已经被分配了返回-1
        if (val > d) { // unusable 如果被使用直接返回-1
            return -1;
        }
        //如果该值没有被使用或者小于2048及查找到最深一层
        while (val < d || (id & initial) == 0) { // id & initial == 1 << d for all ids at depth d, for < d it is 0
            id <<= 1;//找左子节点
            val = value(id);//获取节点的值
            if (val > d) {//如果本节点已经被使用则遍历兄弟节点
                id ^= 1;//相当于+1操作
                val = value(id);//查看兄弟节点是不是被使用
            }
        }
        byte value = value(id);//找到了该节点
        assert value == d && (id & initial) == 1 << d : String.format("val = %d, id & initial = %d, d = %d",
                value, id & initial, d);
        setValue(id, unusable); // mark as unusable设置该节点已经被使用
        updateParentsAlloc(id);
        return id;
    }

这里其实就是在前面我们说的二叉树上查找可以分配内存的位置信息，如果整棵树都不能分配则直接返回-1，找到这个位置后然后在跟新这棵树，我们来看看更新方法

private void updateParentsAlloc(int id) {
        while (id > 1) {
            int parentId = id >>> 1;//获取父id
            byte val1 = value(id);//该id的深度
            byte val2 = value(id ^ 1);//兄弟节点
            byte val = val1 < val2 ? val1 : val2;//取小的值
            setValue(parentId, val);//更新父节点的val值为兄弟节点
            id = parentId;//继续向上传递这种变化
        }
    }

可以看到这里更新分配节点的父节点的值为兄弟节点，然后不停的向上传递这种变化，其实这么操作之后会发现，每个节点可以分配内存的大小就是memoryMap[index]对应深度可以分配的内存大小。比如这里我们是一颗树高度为2，就是[0,0,1,1,2,2,2,2]，我们将下标找到这个最左侧的节点，也就是第一个2的位置将其变为了12，然后向上循环修改后的数组为[0,1,2,1,12,2,2,2],这样也就是说明跟节点能分配的大小为8M，他的左子节点能分配的大小为2M,右子节点可以分配4M大小。我们继续回到allocateSubpage方法中。

更新可用内存大小，从获取上面创建的长度为2048的poolSubpage的数组总获取对应id位置的poolSubpage，如果为null，则创建一个PoolSubpage。我们先跟踪到创建这个PoolSubpage的构造方法中

PoolSubpage(PoolSubpage<T> head, PoolChunk<T> chunk, int memoryMapIdx, int runOffset, int pageSize, int elemSize) {
        this.chunk = chunk;//属于那个chunk
        this.memoryMapIdx = memoryMapIdx;//内存分配管理数组下标位置
        this.runOffset = runOffset;//一段chunk内存的起始位置
        this.pageSize = pageSize;
        //这里最小分配的内存大小为16个字节，一个pageSize是8k，
        //所以最多也就需要8192/16/64=8个long型 表示子page的使用状态，
        //一个long型是64个bit位，也就是最多只需要8个long型就可以表示所有子page的状态了
        bitmap = new long[pageSize >>> 10]; // pageSize / 1024=8
        init(head, elemSize);// 112
    }

void init(PoolSubpage<T> head, int elemSize) {
        doNotDestroy = true;
        this.elemSize = elemSize;//分配的内存大小
        if (elemSize != 0) {
            maxNumElems = numAvail = pageSize / elemSize;//一个page可以创建多少个subPage
            nextAvail = 0;
            bitmapLength = maxNumElems >>> 6;//需要多少个long类型来表示子page的状态
            if ((maxNumElems & 63) != 0) {//如果不足一个long类型则最少为1个
                bitmapLength ++;
            }
            //初始化所有的long类型的bit位全部为0，及全部都是可用状态，每个bit位代表一个子page的状态
            for (int i = 0; i < bitmapLength; i ++) {
                bitmap[i] = 0;
            }
        }
        //将新创建的poolSubPage连接到tinyPoolSubpagePools[]数组上，方便下次分配同样大小的子page
        addToPool(head);
    }

 //head后插入新分配的子page
    private void addToPool(PoolSubpage<T> head) {
        assert prev == null && next == null;
        prev = head;
        next = head.next;
        next.prev = this;
        head.next = this;
    }

这里会赋值当前的poolSubpage是属于那个chunk，然后赋值内存分配管理数组下标位置，在这个chunk中的内存起始位置，赋值page的大小，然后创建一个长度为8的位图。然后执行init方法，计算一个page最多可以分配成多少个subpage，然后计算需要多少个long类型来表示这些subpage是不是被使用的状态，如果不足一个则最少需要一个，然后将这个位图全部初始化为0，然后将这个poolSubpage加到上面创建的tinyPoolSubpagePools，并且新的节点连接到head后面，这样如果创建同样大小的内存时就可以直接申请。继续回到allocateSubpage方法，将申请到的subpage赋值到subpages数组上，然后继续分配操作。

long allocate() {
        //如果分配大小是0直接返回
        if (elemSize == 0) {
            return toHandle(0);
        }
        //如果可分配的子page或者已经销毁直接返回-1
        if (numAvail == 0 || !doNotDestroy) {
            return -1;
        }
        //获取bitmap中可用的id，bitmap的低6位也就是[0-63]表示的是分配的是第几个subpage，
        //剩下的高位表示在bimap中的数组下标，也就是[0-7]，这样就可以通过一个int值来表示，
        //subpage的占用情况，比如我们分配的是16B，现在分配到了第450个subpage，则
        //bitmapIdx的值为111|000001,其实就是449，表示分配的就是第450个subpage
        final int bitmapIdx = getNextAvail();
        //除以64(bitmap的相对下标)
        int q = bitmapIdx >>> 6;
        //除以64取余, 其实就是当前绝对id的偏移量
        int r = bitmapIdx & 63;
        assert (bitmap[q] >>> r & 1) == 0;
        //当前位标记为1
        bitmap[q] |= 1L << r;
        //如果可用的子page为0
        //可用的子page-1
        if (-- numAvail == 0) {
            //子page已经完全使用完了，从链表移除
            removeFromPool();
        }
        //bitmapIdx转换成handle
        return toHandle(bitmapIdx);
    }

这里首先是一些判断，然后从位图中获取可用的位置，我们跟踪到getNextAvail方法。

private int getNextAvail() {
        int nextAvail = this.nextAvail;
        if (nextAvail >= 0) {
            //一个子page被释放之后, 会记录当前子page的bitmapIdx的位置, 下次分配可以直接通过bitmapIdx拿到一个子page
            this.nextAvail = -1;
            return nextAvail;
        }
        return findNextAvail();
    }

 private int findNextAvail() {
        //当前long数组
        final long[] bitmap = this.bitmap;
        //long数组的长度
        final int bitmapLength = this.bitmapLength;
        //遍历所有的long数组获取可用的bitMapIdx
        for (int i = 0; i < bitmapLength; i ++) {
            long bits = bitmap[i];
            if (~bits != 0) {//如果没有全部被使用，则从当前的long类型所代表的子page中查找
                return findNextAvail0(i, bits);
            }
        }
        return -1;
    }

private int findNextAvail0(int i, long bits) {
        final int maxNumElems = this.maxNumElems;//分成多少个子page
        final int baseVal = i << 6;//根据传入的第几个long获取起始位置

        for (int j = 0; j < 64; j ++) {
            //如果是2的倍数，一直向右移动找到第一个为0的位置
            if ((bits & 1) == 0) {
                //这里相当于加, 将i*64之后加上j, 获取绝对下标
                int val = baseVal | j;
                //小于块数，不能越界
                if (val < maxNumElems) {
                    return val;
                } else {
                    break;
                }
            }
            //右移一位
            bits >>>= 1;
        }
        return -1;
    }

这里就是在查询位图那个位置可以使用。找到这个位置后，将这个bit位标记为1，也就代表这个位置被占用了，然后可用位置减一，如果可用位置为0，则将这个subpage从上面说的tinyPoolSubpagePools移除

private void removeFromPool() {
        assert prev != null && next != null;
        prev.next = next;
        next.prev = prev;
        next = null;
        prev = null;
    }

然后将将bitmapIdx转换成handle

private long toHandle(int bitmapIdx) {
        //这个0x4000000000000000L是高位为1其他位置都为0，目的就是为了第一个分配的时候为0，或操作之后不为0，
        //这样在init初始化的时候就可以通过bitmapIdx==0来判断是不是分配的是不是超过一个page了
        return 0x4000000000000000L | (long) bitmapIdx << 32 | memoryMapIdx;
    }

这里可以看到是用一个long型表示，0x4000000000000000L是一个最高位为1其他位置全部为0的二进制数，这里高32为表示在位图中的位置，低32位表示二叉树内存分配的位置，其实就可以唯一确定这块内存的位置。然后我们回到io.netty.buffer.PoolChunk#allocate方法中，调用initBuf方法

initBuf(buf, nioBuffer, handle, reqCapacity);

 void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity) {
        int memoryMapIdx = memoryMapIdx(handle);//取低32位
        int bitmapIdx = bitmapIdx(handle);//取高32位
        if (bitmapIdx == 0) {//如果分配的内存大于一个page
            byte val = value(memoryMapIdx);
            assert val == unusable : String.valueOf(val);
            buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset,
                    reqCapacity, runLength(memoryMapIdx), arena.parent.threadCache());
        } else {
            initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity);
        }
    }

private void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer,
                                    long handle, int bitmapIdx, int reqCapacity) {
        assert bitmapIdx != 0;

        int memoryMapIdx = memoryMapIdx(handle);//二叉树中分配的那个page

        PoolSubpage<T> subpage = subpages[subpageIdx(memoryMapIdx)];//获取子page
        assert subpage.doNotDestroy;
        assert reqCapacity <= subpage.elemSize;
		
        buf.init(
            this, nioBuffer, handle,
            runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,//runOffset(memoryMapIdx): page在chunk中的偏移量
                //(bitmapIdx & 0x3FFFFFFF): 子page是属于第几个子page
                //(bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset:表示在当前page的偏移量
                //offset:0
                reqCapacity, subpage.elemSize, arena.parent.threadCache());
    }

这里获取handle的高32位和低32位的值，然后判断bitmapIdx是否为0，为0说明是分配内存大于一个page，否则申请的内存大小小于一个page，然后开始申请就是对个pooledByteBuf进行赋值，这个byteBuf的在这个chunk中的起始偏移位置，申请的内存大小，和subpage的大小，所属的PoolThreadCache等信息。

void init(PoolChunk<T> chunk, ByteBuffer nioBuffer,
              long handle, int offset, int length, int maxLength, PoolThreadCache cache) {
        init0(chunk, nioBuffer, handle, offset, length, maxLength, cache);
    }

private void init0(PoolChunk<T> chunk, ByteBuffer nioBuffer,
                       long handle, int offset, int length, int maxLength, PoolThreadCache cache) {
        assert handle >= 0;
        assert chunk != null;

        this.chunk = chunk;//属于那个chunk
        memory = chunk.memory;//chunk中的内存
        tmpNioBuf = nioBuffer;
        allocator = chunk.arena.parent;//缓冲区分配器
        this.cache = cache;//PoolThreadCache
        this.handle = handle;//long类所代表子page是否使用 和page位置组成
        this.offset = offset;//chunk中的偏移量，可以定位到每块位置，读写基于该偏移量
        this.length = length;//目标申请内存大小
        this.maxLength = maxLength;//最长可分配的长度，这个与byteBuf扩容有关系
    }

这样就创建一个byteBuf，这个byteBuf表示了这块连续内存的位置，我们针对这个byteBuf的操作就是根据这段内存来进行的。我们再回到io.netty.buffer.PoolArena#allocateNormal方法

qInit.add(c);

void add(PoolChunk<T> chunk) {
        if (chunk.usage() >= maxUsage) {
            nextList.add(chunk);
            return;
        }
        add0(chunk);
    }

void add0(PoolChunk<T> chunk) {
        chunk.parent = this;
        if (head == null) {
            head = chunk;
            chunk.prev = null;
            chunk.next = null;
        } else {
            chunk.prev = null;
            chunk.next = head;
            head.prev = chunk;
            head = chunk;
        }
    }

将上面创建的chunk根据使用率添加到对应的chunkList中，下次申请内存的时候就可以在chunkList双向链表中查询chunk进行内存分配了。回到io.netty.buffer.PoolArena#allocate()方法，记录申请了多少个tiny或small类型。

incTinySmallAllocation(tiny);
private void incTinySmallAllocation(boolean tiny) {
        if (tiny) {
            allocationsTiny.increment();
        } else {
            allocationsSmall.increment();
        }
    }

我们来看看normal类型的分配

if (normCapacity <= chunkSize) {
            if (cache.allocateNormal(this, buf, reqCapacity, normCapacity)) {
                // was able to allocate out of the cache so move on
                return;
            }
            synchronized (this) {
                allocateNormal(buf, reqCapacity, normCapacity);
                ++allocationsNormal;
            }
        }

这里同样是先从每个线程的缓存中分配，如果分配失败再直接分配，我们这里先跳过缓存分配，下面会在介绍，跟踪到allocateNormal方法，我们上面也就分析过了，会走到io.netty.buffer.PoolChunk#allocate方法中

if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
            //需要分配的内存大于8K
            handle =  allocateRun(normCapacity);
        }

这里申请的内存大于一个page所以会执行allocateRun方法，我们继续跟踪进去

private long allocateRun(int normCapacity) {
        int d = maxOrder - (log2(normCapacity) - pageShifts);//获取分配大于8K的内存深度
        int id = allocateNode(d);//获取在二叉树中可以分配的位置
        //如果已经分配满了直接返回
        if (id < 0) {
            return id;
        }
        //修改可使用长度
        freeBytes -= runLength(id);
        return id;
    }

然后会走到io.netty.buffer.PoolChunk#initBuf方法，这个方法我们在上面也有介绍，这里bitmapIdx==0走到if逻辑

if (bitmapIdx == 0) {//如果分配的内存大于一个page
            byte val = value(memoryMapIdx);
            assert val == unusable : String.valueOf(val);
            buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset,
                    reqCapacity, runLength(memoryMapIdx), arena.parent.threadCache());
        }

和上面分析的过程类型，就是获取一个pooledByteBuf然后赋值开始的偏移量位置，所属的PoolThreadCache，

申请长度，最大长度等值。然后就可以使用了。

huge类型内存申请，huge类型大小超过了16M，就不会通过池化分配而是直接申请

else {
            // Huge allocations are never served via the cache so just call allocateHuge
            //分配huge对象，超过了16M
            allocateHuge(buf, reqCapacity);
        }

private void allocateHuge(PooledByteBuf<T> buf, int reqCapacity) {
        //申请所需大小内存
        PoolChunk<T> chunk = newUnpooledChunk(reqCapacity);
        activeBytesHuge.add(chunk.chunkSize());
        //huge类型，不会池化直接初始化
        buf.initUnpooled(chunk, reqCapacity);
        allocationsHuge.increment();
    }

void initUnpooled(PoolChunk<T> chunk, int length) {
        init0(chunk, null, 0, chunk.offset, length, length, null);
    }

    private void init0(PoolChunk<T> chunk, ByteBuffer nioBuffer,
                       long handle, int offset, int length, int maxLength, PoolThreadCache cache) {
        assert handle >= 0;
        assert chunk != null;

        this.chunk = chunk;//属于那个chunk
        memory = chunk.memory;//chunk中的内存
        tmpNioBuf = nioBuffer;
        allocator = chunk.arena.parent;//缓冲区分配器
        this.cache = cache;//PoolThreadCache
        this.handle = handle;//long类所代表子page是否使用 和page位置组成
        this.offset = offset;//chunk中的偏移量，可以定位到每块位置，读写基于该偏移量
        this.length = length;//目标申请内存大小
        this.maxLength = maxLength;//最长可分配的长度，这个与byteBuf扩容有关系
    }