cocos2dx-CCSpriteBatchNode源码分析(VAO+VBO)

最新推荐文章于 2019-05-20 22:26:12 发布

CodeWorld_Flying

最新推荐文章于 2019-05-20 22:26:12 发布

阅读量2.2k

点赞数 2

分类专栏： cocos2dx-2.2.2pak-公司版本游戏游戏引擎文章标签： cocos2dx CCSprite BatchNode 游戏 VAO-VBO

本文链接：https://blog.csdn.net/beautyleaf/article/details/51839208

版权

cocos2dx-2.2.2pak-公司版本同时被 3 个专栏收录

10 篇文章 0 订阅

订阅专栏

游戏引擎

10 篇文章 0 订阅

订阅专栏

游戏

4 篇文章 0 订阅

订阅专栏

CCSpriteBatchNode简介

CCSpriteBatchNode是用来一次渲染多个精灵的，这些精灵是CCSpriteBatchNode的孩子，并且使用同一个纹理，后面将详细分析CCSpriteBatchNode的孩子为什么用使用同一个纹理。多个纹理可以打包成一个纹理，然后通过精灵帧缓存载入这个纹理文件的属性文件，创建纹理与精灵帧，后面用相应的精灵帧创建精灵，这里省去了多次读取外部文件，创建多个纹理的消耗，但是每个精灵的渲染还是单独的向gl发送绘图命令，而不是把数据一起发给gl进行处理。而使用CCSpriteBatchNode可以避免这个问题，它们的纹理可以打包在一起，后面使用CCSpriteBatchNode作为父节点，实现把多次渲染命令改为一次，降低数据传递的次数，以提高渲染效率。CCSpriteBatchNode的子节点必须可以向上转型为CCSprite类型，它内部的函数是针对CCSprite的，每加入一个CCSprite，CCSprite的四个顶点的坐标、颜色、纹理坐标都会被CCSpriteBatchNode保存，如果是用户自定义的类型，除非派生于CCSprite，也只有这几个顶点，要是包含很多子孩子，那么必然不可以的。
下面是使用的例子：

代码如下：

CCSpriteBatchNode *batchNode = CCSpriteBatchNode::create("role-run2/AthenaAsamiya_by_SWP.png");//128x192

 for (int i=0; i<4; i++) {
     for (int j=0; j<4; j++) {
         CCSprite  *child = CCSprite::create("role-run2/AthenaAsamiya_by_SWP.png", CCRectMake(j*32, i*48, 32, 48));
         int x = random() % 640;
         int y = random() % 1136;
         child->setPosition(ccp(x, y));
         batchNode->addChild(child);
     }
 }

 this->addChild(batchNode);

下面对CCSpriteBatchNode代码进行下分析

CCSpriteBatchNode创建

//kDefaultSpriteBatchCapacity 默认29
static CCSpriteBatchNode* create(const char* fileImage) {
      return CCSpriteBatchNode::create(fileImage, kDefaultSpriteBatchCapacity);
  }

CCSpriteBatchNode* CCSpriteBatchNode::create(const char *fileImage, unsigned int capacity/* = kDefaultSpriteBatchCapacity*/)
{
    CCSpriteBatchNode *batchNode = new CCSpriteBatchNode();
    batchNode->initWithFile(fileImage, capacity);
    batchNode->autorelease();

    return batchNode;
}

bool CCSpriteBatchNode::initWithFile(const char* fileImage, unsigned int capacity)
{
    CCTexture2D *pTexture2D = CCTextureCache::sharedTextureCache()->addImage(fileImage);
    return initWithTexture(pTexture2D, capacity);
}

上面kDefaultSpriteBatchCapacity定义了CCSpriteBatchNode默认的顶点信息结构的大小，这个信息描述了每个精灵的四个顶点的信息，默认大小29。最后调用initWithTexture(pTexture2D, capacity)进行初始化。代码如下：

bool CCSpriteBatchNode::initWithTexture(CCTexture2D *tex, unsigned int capacity)
{
    m_blendFunc.src = CC_BLEND_SRC; //设置混合方式
    m_blendFunc.dst = CC_BLEND_DST;
    m_pobTextureAtlas = new CCTextureAtlas(); //创建纹理集

    if (0 == capacity)
    {
        capacity = kDefaultSpriteBatchCapacity; //容量必然大于0
    }

    m_pobTextureAtlas->initWithTexture(tex, capacity); //初始化纹理集

    updateBlendFunc();//更新混合方式

    // no lazy alloc in this node
    m_pChildren = new CCArray();
    m_pChildren->initWithCapacity(capacity);//孩子数组

    m_pobDescendants = new CCArray();
    m_pobDescendants->initWithCapacity(capacity);//孩子数组

    setShaderProgram(CCShaderCache::sharedShaderCache()->programForKey(kCCShader_PositionTextureColor));//设置着色程序
    return true;
}

m_pobTextureAtlas->initWithTexture(tex, capacity);用来对纹理集进行初始化，传入了tex, capacity，之所以传入tex, capacity，因为CCSpriteBatchNode重写了visit、draw等函数，它把真正进行渲染的操作交由m_pobTextureAtlas进行处理了。继续看代码：

bool CCTextureAtlas::initWithTexture(CCTexture2D *texture, unsigned int capacity)
{
//    CCAssert(texture != NULL, "texture should not be null");
    m_uCapacity = capacity; //容量
    m_uTotalQuads = 0; //四边形结构数

    // retained in property
    this->m_pTexture = texture;  //保存渲染纹理
    CC_SAFE_RETAIN(m_pTexture); //只要纹理被引用，就计数加一

    // Re-initialization is not allowed
    CCAssert(m_pQuads == NULL && m_pIndices == NULL, ""); //断言四边形结构与顶点索引为NULL

    //为m_pQuads、m_pIndices分配内存
    m_pQuads = (ccV3F_C4B_T2F_Quad*)malloc( m_uCapacity * sizeof(ccV3F_C4B_T2F_Quad) ); //quad vertex of children
    m_pIndices = (GLushort *)malloc( m_uCapacity * 6 * sizeof(GLushort) ); //children count

    if( ! ( m_pQuads && m_pIndices) && m_uCapacity > 0)  // failure handle
    {
        //CCLOG("cocos2d: CCTextureAtlas: not enough memory");
        CC_SAFE_FREE(m_pQuads);
        CC_SAFE_FREE(m_pIndices);

        // release texture, should set it to null, because the destruction will
        // release it too. see cocos2d-x issue #484
        CC_SAFE_RELEASE_NULL(m_pTexture);
        return false;
    }

    memset( m_pQuads, 0, m_uCapacity * sizeof(ccV3F_C4B_T2F_Quad) ); //set zero
    memset( m_pIndices, 0, m_uCapacity * 6 * sizeof(GLushort) ); //set zero

#if CC_ENABLE_CACHE_TEXTURE_DATA
    // listen the event when app go to background
    CCNotificationCenter::sharedNotificationCenter()->addObserver(this,
                                                           callfuncO_selector(CCTextureAtlas::listenBackToForeground),
                                                           EVENT_COME_TO_FOREGROUND,
                                                           NULL);
#endif

    this->setupIndices(); //设置顶点索引

#if CC_TEXTURE_ATLAS_USE_VAO
    setupVBOandVAO();    // use VBO and VAO
#else    
    setupVBO();
#endif

    m_bDirty = true;

    return true;
}

m_uTotalQuads描述了当前存在的子节点数，m_pQuads是个大容器，包含N个ccV3F_C4B_T2F_Quad，m_pIndices是顶点索引的容器。上面this->setupIndices();代码如下：

void CCTextureAtlas::setupIndices()
{
    if (m_uCapacity == 0)
        return;

    for( unsigned int i=0; i < m_uCapacity; i++)
    {
#if CC_TEXTURE_ATLAS_USE_TRIANGLE_STRIP
        m_pIndices[i*6+0] = i*4+0;
        m_pIndices[i*6+1] = i*4+0;
        m_pIndices[i*6+2] = i*4+2;        
        m_pIndices[i*6+3] = i*4+1;
        m_pIndices[i*6+4] = i*4+3;
        m_pIndices[i*6+5] = i*4+3;
#else
        // use two triangles as a quad, so every quad has six indices
        /*
            0----3
            | \  |
            |  \ |
            1----2
         */
        m_pIndices[i*6+0] = i*4+0;
        m_pIndices[i*6+1] = i*4+1;
        m_pIndices[i*6+2] = i*4+2;

        // inverted index. issue #179
        m_pIndices[i*6+3] = i*4+3;
        m_pIndices[i*6+4] = i*4+2;
        m_pIndices[i*6+5] = i*4+1;        
#endif    
    }
}

这里要注意的是，让gl执行渲染任务前，需要进行一些数据定义，然后在渲染的时候把数据传给gl。m_pQuads中元素结构是ccV3F_C4B_T2F_Quad(1个顶点坐标、1个颜色、1个纹理坐标)，它存储了每个精灵渲染用到的顶点信息。在渲染的时候需要告诉gl要画什么图形，三角形扇、三角形带、四边形还是什么图形。前面通过m_pQuads存储了所有的顶点信息，它们在数组中的编号是0、1、2…,显然可以通过它们对绘制的图形进行描述，这里m_pIndices正是对所有四边形的描述，每个索引对应m_pQuads中的一个顶点结构。上面 for( unsigned int i=0; i < m_uCapacity; i++)表示一共有m_uCapacity个四边形，m_pIndices[i*6+0]表示一个四边形有6个索引，每个四边形用2个三角形表示，所以有6个索引。现在用于渲染的图形由m_pIndices定义好了，后面不需在改变，m_pQuads中数据现在还是未定义的，后面的addChild会改变它。
setupVBOandVAO();这个函数作用是设置VBO(vertex buffer object)与VAO(vertex array object)。VBO是一个在显卡中的缓存，可以存储顶点数据，避免了每次绘制图形时，由内存向gpu传送数据，默认内存数据每次渲染时需要往gpu传送，执行完一直绘制后就被清除了。显然这个挺耗时间的，尤其是数据量比较多的，gpu大量宽带将用于数据传输，不仅性能低，还耗电啊。所以gl提出了VBO，允许客户把数据存储在显存中。顶点数据在显存中被存储，每次绘制图形，必须传递绘制指令，描述将要绘制的图形顶点坐标存储在VBO的什么位置，颜色、纹理坐标又存在什么位置。显然这些数据应该可以缓存的，这样以后绘制就不再需要重复的传递这些信息了，直接利用缓存在显存中的数据。VAO正是解决上面问题的。setupVBOandVAO()代码如下：

#if CC_TEXTURE_ATLAS_USE_VAO
void CCTextureAtlas::setupVBOandVAO()
{
    glGenVertexArrays(1, &m_uVAOname);
    ccGLBindVAO(m_uVAOname);

#define kQuadSize sizeof(m_pQuads[0].bl)

    glGenBuffers(2, &m_pBuffersVBO[0]);

    glBindBuffer(GL_ARRAY_BUFFER, m_pBuffersVBO[0]);
    glBufferData(GL_ARRAY_BUFFER, sizeof(m_pQuads[0]) * m_uCapacity, m_pQuads, GL_DYNAMIC_DRAW);

    // vertices
    glEnableVertexAttribArray(kCCVertexAttrib_Position);
    glVertexAttribPointer(kCCVertexAttrib_Position, 3, GL_FLOAT, GL_FALSE, kQuadSize, (GLvoid*) offsetof( ccV3F_C4B_T2F, vertices));

    // colors
    glEnableVertexAttribArray(kCCVertexAttrib_Color);
    glVertexAttribPointer(kCCVertexAttrib_Color, 4, GL_UNSIGNED_BYTE, GL_TRUE, kQuadSize, (GLvoid*) offsetof( ccV3F_C4B_T2F, colors));

    // tex coords
    glEnableVertexAttribArray(kCCVertexAttrib_TexCoords);
    glVertexAttribPointer(kCCVertexAttrib_TexCoords, 2, GL_FLOAT, GL_FALSE, kQuadSize, (GLvoid*) offsetof( ccV3F_C4B_T2F, texCoords));

    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pBuffersVBO[1]);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(m_pIndices[0]) * m_uCapacity * 6, m_pIndices, GL_STATIC_DRAW);

    // Must unbind the VAO before changing the element buffer.
    ccGLBindVAO(0);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
    glBindBuffer(GL_ARRAY_BUFFER, 0);

    CHECK_GL_ERROR_DEBUG();
}

glGenVertexArrays(1, &m_uVAOname); 是gl的生成VAO接口，m_uVAOname是返回的VAO句柄，ccGLBindVAO(m_uVAOname);调用了glBindVertexArray(vaoId);这个是gl绑定VAO接口，绑定后客户就可以使用这个VAO了。

glGenBuffers(2, &m_pBuffersVBO[0]);可以写成glGenBuffers(2, m_pBuffersVBO)，作用是生成两个VBO，一个用来存顶点信息，一个用来存储用于描述绘制图元的顶点索引信息。

glBindBuffer(GL_ARRAY_BUFFER, m_pBuffersVBO[0]);就是把VBO绑定到GL_ARRAY_BUFFER上，GL_ARRAY_BUFFER是gl中用于存储顶点信息的缓存ID，glBufferData(GL_ARRAY_BUFFER, sizeof(m_pQuads[0]) * m_uCapacity, m_pQuads, GL_DYNAMIC_DRAW);往缓存GL_ARRAY_BUFFER中写入sizeof(m_pQuads[0]) * m_uCapacity个大小数据，数据来源m_pQuads。glEnableVertexAttribArray(kCCVertexAttrib_Position);开启索引为kCCVertexAttrib_Color的顶点属性数组，开启后可以往里面写东西，本来是每次绘制图元时，是把内存中数据写入到顶点属性数组，然后供着色器使用的，现在还没绘制显然这些顶点数据并不会写入到顶点属性数组中，即便写入，后面其它图元的绘制都会把这个供gl放顶点属性数组的位置给占了。所以glVertexAttribPointer(kCCVertexAttrib_Position, 3, GL_FLOAT, GL_FALSE, kQuadSize, (GLvoid*) offsetof( ccV3F_C4B_T2F, vertices));并是不直接往顶点属性数组写入数据，而是把这个描述记录在VAO中。【A Vertex Array Object (VAO) is an object which contains one or more Vertex Buffer Objects and is designed to store the information for a complete rendered object】，上面是gl组织在wiki中的描述，可以看出VAO保存了要渲染的对象信息。由于前面绑定使用了VBO，所以(GLvoid*) offsetof( ccV3F_C4B_T2F, vertices)描述的数据地址是在VBO中的偏移地址。

后面两组是对颜色与纹理坐标的设置。这里涉及到了渲染，所以代码都是跟gl相关的。在gl中需要某个对象的一般调用形式是glGen[OBJECT TYPE],gl将生成一个对象句柄给客户。后面如果要使用这个对象，得告诉gl要使用这个对象，告诉gl的方法就是
glBind[OBJECT TYPE].设置对象数据的标准接口是gl[OBJECT TYPE]Data.然后对于一些函数，它的输入源可以来至于客户内存，可以来至于显存，如果客户之前指定了使用显存中的缓存对象，那么就从显存中加载数据，否则就从客户指定的地址加载数据。

最后面需要对VAO、VBO解除使用，不解除使用，后面遇到其它的对这些VAO VBO的绑定使用倒没什么关系，就怕遇到哪些数据存取的地方，它们会根据是否之前使用缓存对象，来决定数据的来源，所以，这里为了意外必须解除使用，方法就是给绑定接口传0.有一点需要注意的是，gl中定义了一套固定的操作及标示这些函数会用到的变量的常量，GL_ARRAY_BUFFER就是其中一个，客户无法直接操作这个函数中变量，但是可以通过绑定，改变变量对应的常量关联的客户对象。大致代码如下：

struct GL_Context{
  GL_Context(){
    m_map2[GL_ARRAY_BUFFER] =  -1;//init GL_ARRAY_BUFFER
    m_map2[GL_ELEMENT_ARRAY_BUFFER] = -1;
  }
  static GL_Context* getInstance(){//...单件};
  void draw(){//...渲染}

  <GLuint, GLvoid*> m_map;
  <GLenum, GLuint> m_map2;
};

glGenBuffers(GLsizei n, GLuint* buffers){
  static GLuint index = -1;
  for(GLsizei i=0; i<n; i++){
    buffers[i] = ++index;
    GL_Context::getInstance().m_map[buffers[i]] = 0;//just set 0
  }
}

void glBindBuffer(GLenum target, GLuint buffer){
  GL_Context::getInstance().m_map2[target] = buffer; //添加映射
}

glBufferData(GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage){
  auto iter = GL_Context::getInstance().m_map2.find(target);
  GLuint bufferID = iter->second; //获得客户缓存句柄
  if(bufferID == -1) return;// not bind
  GLvoid* bufferAddress = new GLsizeiptr[size];
  memcpy(bufferAddress, size, 1, data);
  GL_Context::getInstance().m_map.insert(pair<GLuint, GLvoid*>(bufferID, bufferAddress));
}

_glDraw(){//gl内部渲染函数
  GL_Context::getInstance()->draw();
}

上面是对gl的伪代码描述，gl有一个上下文，操作还是对它进行各种配置，最后渲染。我用c++对gl的这些函数进行了描述，它们大致的功能就是这些，可以看出绑定很重要，glBufferData是取出target对应的客户对象句柄，然后把分配的内存加到客户对象句柄的映射表中。这些函数有助于对gl的理解，这些虽然不是源码，但是可以帮助推测，当对gl理解晦涩时，自己可以尝试推理出符合要求的代码。前面的GL_ELEMENT_ARRAY_BUFFER指的是顶点索引缓存。

CCTextureAtlas::initWithTexture(CCTexture2D *texture, unsigned int capacity)的分析完毕，主要是生成后面用到的数据，以及使用VAO与VBO把顶点数据与顶点索引缓存在gpu的显存中。

CCSpriteBatchNode渲染

下面分析一下CCSpriteBatchNode是如何渲染出来的。渲染从它的visit入手，代码如下：

void CCSpriteBatchNode::visit(void)
{
    CC_PROFILER_START_CATEGORY(kCCProfilerCategoryBatchSprite, "CCSpriteBatchNode - visit");
    if (! m_bVisible)
    {
        return;
    }

    kmGLPushMatrix(); //复制栈顶矩阵压栈，为了不污染其它渲染对象变换矩阵

    if (m_pGrid && m_pGrid->isActive())
    {
        m_pGrid->beforeDraw();
        transformAncestors();
    }

    sortAllChildren(); //对孩子排序
    transform(); //计算变换矩阵

    draw(); //渲染

    if (m_pGrid && m_pGrid->isActive())
    {
        m_pGrid->afterDraw(this);
    }

    kmGLPopMatrix(); //弹出之前赋值的矩阵
    setOrderOfArrival(0);

    CC_PROFILER_STOP_CATEGORY(kCCProfilerCategoryBatchSprite, "CCSpriteBatchNode - visit");

}

上面直接draw了，并没递归调用孩子的visit，下面对它的sortAllChildren()以及draw()进行分析，这两个函数比较关键。sortAllChildren()代码如下：

void CCSpriteBatchNode::sortAllChildren()
{
    if (m_bReorderChildDirty)
    {
        int i = 0,j = 0,length = m_pChildren->data->num;
        CCNode ** x = (CCNode**)m_pChildren->data->arr;
        CCNode *tempItem = NULL;

        //insertion sort
        for(i=1; i<length; i++)
        {
            tempItem = x[i];
            j = i-1;

            //continue moving element downwards while zOrder is smaller or when zOrder is the same but orderOfArrival is smaller
            while(j>=0 && ( tempItem->getZOrder() < x[j]->getZOrder() || ( tempItem->getZOrder() == x[j]->getZOrder() && tempItem->getOrderOfArrival() < x[j]->getOrderOfArrival() ) ) )
            {
                x[j+1] = x[j];
                j--;
            }

            x[j+1] = tempItem;
        }

        //sorted now check all children
        if (m_pChildren->count() > 0)
        {
            //first sort all children recursively based on zOrder
            arrayMakeObjectsPerformSelector(m_pChildren, sortAllChildren, CCSprite*);

            int index=0;

            CCObject* pObj = NULL;
            //fast dispatch, give every child a new atlasIndex based on their relative zOrder (keep parent -> child relations intact)
            // and at the same time reorder descendants and the quads to the right index
            CCARRAY_FOREACH(m_pChildren, pObj)
            {
                CCSprite* pChild = (CCSprite*)pObj;
                updateAtlasIndex(pChild, &index);
            }
        }

        m_bReorderChildDirty=false;
    }
}

for(i=1; i<length; i++)部分使用插入排序,tempItem->getZOrder() < x[j]->getZOrder()表示zorder小的在前面，( tempItem->getZOrder() == x[j]->getZOrder() && tempItem->getOrderOfArrival() < x[j]->getOrderOfArrival() )表示zorder相等时，谁先加进来的在前面。最后排好序的结果就是zorder最大最在后面，越后被渲染，所以游戏中zorder大的会把zorder小的覆盖住。CCARRAY_FOREACH(m_pChildren, pObj)对每个孩子的顶点信息quad进行了重新位置设置，记得前面初始化的时候有一个m_pQuads,它的数据就来至于这些孩子。代码如下：

void CCSpriteBatchNode::updateAtlasIndex(CCSprite* sprite, int* curIndex)
{
    unsigned int count = 0;
    CCArray* pArray = sprite->getChildren();
    if (pArray != NULL)
    {
        count = pArray->count();
    }

    int oldIndex = 0;

    if( count == 0 )
    {
        oldIndex = sprite->getAtlasIndex(); //获得纹理集中的渲染次序
        sprite->setAtlasIndex(*curIndex); //用当前值设置，*curIndex根据zorder的增长而增长
        sprite->setOrderOfArrival(0); //设置到达次序为0
        if (oldIndex != *curIndex){
            swap(oldIndex, *curIndex); //交换CCSpriteBatchNode的m_quads存储的四边形结构
        }
        (*curIndex)++; //下一个zorder大的元素纹理集索引
    }
    else
    {
        bool needNewIndex=true;

        if (((CCSprite*) (pArray->data->arr[0]))->getZOrder() >= 0)
        {
            //all children are in front of the parent
            oldIndex = sprite->getAtlasIndex();
            sprite->setAtlasIndex(*curIndex);
            sprite->setOrderOfArrival(0);
            if (oldIndex != *curIndex)
            {
                swap(oldIndex, *curIndex);
            }
            (*curIndex)++;

            needNewIndex = false;
        }

        CCObject* pObj = NULL;
        CCARRAY_FOREACH(pArray,pObj)
        {
            CCSprite* child = (CCSprite*)pObj;
            if (needNewIndex && child->getZOrder() >= 0)
            {
                oldIndex = sprite->getAtlasIndex();
                sprite->setAtlasIndex(*curIndex);
                sprite->setOrderOfArrival(0);
                if (oldIndex != *curIndex) {
                    this->swap(oldIndex, *curIndex);
                }
                (*curIndex)++;
                needNewIndex = false;

            }

            updateAtlasIndex(child, curIndex);
        }

        if (needNewIndex)
        {//all children have a zOrder < 0)
            oldIndex=sprite->getAtlasIndex();
            sprite->setAtlasIndex(*curIndex);
            sprite->setOrderOfArrival(0);
            if (oldIndex!=*curIndex) {
                swap(oldIndex, *curIndex);
            }
            (*curIndex)++;
        }
    }
}

上面代码分两部分，一部分没有孩子，一部分有孩子.当有孩子时，首先sprite->getAtlasIndex()获得精灵的在纹理集中的索引，索引最大越后被渲染。这个索引值在CCSprite加入到父亲中被设置，这个值并不是最终值，还需要调整，后面分析CCSpriteBatchNode是如何添加进一个节点的。上面要注意的是纹理集中索引，每个精灵单独保存了这个值，同时CCSpriteBatchNode中的数组保存了每个精灵的四边形结构，这个结构的顺序是按照纹理集索引由小到大排序的，用于渲染保持，zorder大的在后面被渲染，所以更新了精灵的纹理集索引，还需要更新m_quads中的元素。swap(oldIndex, *curIndex)正是做这件事的，代码如下：

void CCSpriteBatchNode::swap(int oldIndex, int newIndex)
{
    CCObject** x = m_pobDescendants->data->arr; //获得所有后裔，后裔的添加代码后面贴出，m_pobDescendants的后裔按是已排序的
    ccV3F_C4B_T2F_Quad* quads = m_pobTextureAtlas->getQuads(); //获得纹理集所有精灵的四边形

    CCObject* tempItem = x[oldIndex]; //取出后裔节点
    ccV3F_C4B_T2F_Quad tempItemQuad=quads[oldIndex]; //去除后裔节点对应的quad

    //update the index of other swapped item
    ((CCSprite*) x[newIndex])->setAtlasIndex(oldIndex); //把要交换的节点的纹理索引设置成交换后的索引

    //交换两个后裔节点
    x[oldIndex]=x[newIndex];
    quads[oldIndex]=quads[newIndex];
    x[newIndex]=tempItem;
    quads[newIndex]=tempItemQuad;
}

CCSpriteBatchNode用m_pobDescendants存储了所有的后裔节点，包含孩子的孩子，所有的后裔渲染都是由CCSpriteBatchNode负责的。除此之外CCSpriteBatchNode的m_pobTextureAtlas中存储了所有后裔的四个顶点信息。上面的作用就是交换oldIndex与newIndex两个位置的后裔节点与这两个位置的四边形结构。至于m_pobDescendants是如何加入后裔孩子的需要分析addChild，后面分析。

有孩子时if (((CCSprite*) (pArray->data->arr[0]))->getZOrder() >= 0)表示第一个孩子zorder比父亲大，所以不需要递归更新孩子的了。只需要更新自己，把m_pobDescendants中后裔节点、m_pobTextureAtlas节点顶点信息交换下以及更新下自己的在纹理集中索引。有孩子是先更新z小与0的，if (needNewIndex && child->getZOrder() >= 0)语句进不去，然后遇到z大于等于0的要把父亲先更新了，也就是执行if (needNewIndex && child->getZOrder() >= 0)的代码，执行完了后needNewIndex = false;，接着调用updateAtlasIndex(child, curIndex);把z大于等于0的更新，后面由于needNewIndex为false，也就只会执行updateAtlasIndex(child, curIndex)这句代码了。最后面的if (needNewIndex)表示孩子的z都小于0，父亲还没被更新，所以将执行f (needNewIndex)里面的语句。
所有的孩子都按照zorder的大小进行了排序，这个是在CCSpriteBatchNode::sortAllChildren()中完成的，里面有一句arrayMakeObjectsPerformSelector(m_pChildren, sortAllChildren, CCSprite*);代码如下：

#define arrayMakeObjectsPerformSelector(pArray, func, elementType)    \
do {                                                                  \
    if(pArray && pArray->count() > 0)                                 \
    {                                                                 \
        CCObject* child;                                              \
        CCARRAY_FOREACH(pArray, child)                                \
        {                                                             \
            elementType pNode = (elementType) child;                  \
            if(pNode)                                                 \
            {                                                         \
                pNode->func();                                        \
            }                                                         \
        }                                                             \
    }                                                                 \
}                                                                     \
while(false)

就是递归调用孩子的sortAllChildren函数，对所有孩子按z升序进行排序，这些孩子的sortAllChildren代码调用的是CCNode的，这里不贴代码了。

CCSpriteBatchNode::visit(void)中的sortAllChildren()分析完毕，这里要知道几点：
1. CCSpriteBatchNode的孩子CCSprite存储了在纹理集中的索引位置
2. CCSpriteBatchNode用m_pobDescendants存储了所有子节点包含这些节点的孩子
3. CCSpriteBatchNode用到的纹理集m_pobTextureAtlas保存了每个后裔节点(子节点以及这些节点的孩子）的quad信息，quad用于存储渲染图元信息的。
4. CCSpriteBatchNode中的孩子先被按z升序排序，排序是递归的，所以所有孩子的子节点都是排序好的了
5. CCSpriteBatchNode的孩子的最后根据z，把自己的在纹理集中的索引位置、在m_pobDescendants中的位置以及quad在m_pobTextureAtlas的成员m_pQuads中位置都排好序了。
6. m_pobDescendants中的位置的位置是根据加入时的次序得到的，第几个加入就位置第几个，先处理父亲的，再递归处理孩子的。m_pobDescendants中的位置会保存在CCSprite的纹理集中。在visit方法中，将会根据z调整孩子在m_pChildren中位置，然后遍历m_pChildren对m_pobDescendants进行调整，调整同时会调整在m_pQuads位置。
7. m_pobDescendants存在的目的是添加、删除孩子的时候，把孩子及后裔移除掉，m_pobTextureAtlas的m_pQuads是为了按z升序渲染图元。m_pobDescendants存的是节点，m_pQuads存的是quad，纹理集中的渲染次序存在精灵中。3者缺一不可。

再分析visit中的draw()函数，它的作用就是渲染CCSpriteBatchNode，代码如下：

void CCSpriteBatchNode::draw(void)
{
    CC_PROFILER_START("CCSpriteBatchNode - draw");

    // Optimization: Fast Dispatch
    if( m_pobTextureAtlas->getTotalQuads() == 0 )
    {
        return;
    }

    CC_NODE_DRAW_SETUP();

    arrayMakeObjectsPerformSelector(m_pChildren, updateTransform, CCSprite*); //更新每个孩子的变换矩阵

    ccGLBlendFunc( m_blendFunc.src, m_blendFunc.dst );

    m_pobTextureAtlas->drawQuads();

    CC_PROFILER_STOP("CCSpriteBatchNode - draw");
}

m_pobTextureAtlas->drawQuads()代码如下：

 void CCTextureAtlas::drawQuads()
{
    this->drawNumberOfQuads(m_uTotalQuads, 0);
}

void CCTextureAtlas::drawNumberOfQuads(unsigned int n, unsigned int start)
{    
    //glActiveTexture(GL_TEXTURE0 + textureUnit);
    //  glBindTexture(GL_TEXTURE_2D, textureId);
    ccGLBindTexture2D(m_pTexture->getName());

    if (m_bDirty)
    {
        glBindBuffer(GL_ARRAY_BUFFER, m_pBuffersVBO[0]);
        void *buf = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
        memcpy(buf, m_pQuads, sizeof(m_pQuads[0])* (n-start));
        glUnmapBuffer(GL_ARRAY_BUFFER);
        glBindBuffer(GL_ARRAY_BUFFER, 0);
        m_bDirty = false;
    }
    ccGLBindVAO(m_uVAOname);

    glDrawElements(GL_TRIANGLES, (GLsizei) n*6, GL_UNSIGNED_SHORT, (GLvoid*) (start*6*sizeof(m_pIndices[0])) );

//    glBindVertexArray(0);
    CC_INCREMENT_GL_DRAWS(1);
    CHECK_GL_ERROR_DEBUG();
}

上面代码进行删减、留下了核心代码。渲染前先绑定纹理使用，之前我写的gl的一些伪代码可以看出，为什么需要绑定才能使用，gl在使用前都是获取对应的key的值，客户通过绑定设置key的值，上面glActiveTexture是绑定前必须做的，由于纹理多个有key为GL_TEXTURE_2D的单元，所以整个key形式是[key1,key2]，key1是描述纹理单元ID，key2描述纹理ID。if (m_bDirty)表示渲染的数据有变化，比如精灵位置发生了变换，或者添加、删除了精灵，必须更新显存的缓存，glMapBuffer是获得显存地址，然后把新的数据通过memcpy传了过去。之前谈到VAO与VBO，VBO保存了顶点数据，VAO保存了图元描述渲染信息，包含VBO。后面只需要绑定使用VAO，就直接可以通过glDrawElements渲染图元了，不再需要绑定使用存储顶点信息VBO，然后把VBO中的数据通过glVertexAttribPointer传给对应的gl属性位置处，通过绑定使用存储绘制图元的顶点索引VBO，把数据通过glDrawElements传给gl。VAO的作用简化了代码编写，最重要简化了数据传输，优化了性能。CCSpriteBatchNode的渲染分析好了，下面最后分析一下CCSpriteBatchNode是怎么添加一个孩子进来的。

CCSpriteBatchNode添加节点

CCSpriteBatchNode::addChild(CCNode *child, int zOrder, int tag)代码如下：

void CCSpriteBatchNode::addChild(CCNode *child, int zOrder, int tag)
{
    CCAssert(child != NULL, "child should not be null");
    CCAssert(dynamic_cast<CCSprite*>(child) != NULL, "CCSpriteBatchNode only supports CCSprites as children");
    CCSprite *pSprite = (CCSprite*)(child);
    // check CCSprite is using the same texture id
    CCAssert(pSprite->getTexture()->getName() == m_pobTextureAtlas->getTexture()->getName(), "CCSprite is not using the same texture id");

    CCNode::addChild(child, zOrder, tag);

    appendChild(pSprite);
}

CCNode::addChild(child, zOrder, tag)就是往m_pChildren加个孩子，不再分析，appendChild(pSprite)是关键，代码如下：

void CCSpriteBatchNode::appendChild(CCSprite* sprite)
{
    m_bReorderChildDirty=true;
    sprite->setBatchNode(this);
    sprite->setDirty(true);

    if(m_pobTextureAtlas->getTotalQuads() == m_pobTextureAtlas->getCapacity()) {
        increaseAtlasCapacity();
    }

    ccArray *descendantsData = m_pobDescendants->data;

    ccArrayAppendObjectWithResize(descendantsData, sprite);

    unsigned int index=descendantsData->num-1;

    sprite->setAtlasIndex(index);

    ccV3F_C4B_T2F_Quad quad = sprite->getQuad();
    m_pobTextureAtlas->insertQuad(&quad, index);

    // add children recursively

    CCObject* pObj = NULL;
    CCARRAY_FOREACH(sprite->getChildren(), pObj)
    {
        CCSprite* child = (CCSprite*)pObj;
        appendChild(child);
    }
}

在上面可以看到之间渲染时讲到的m_pobDescendants、m_pobTextureAtlas。下面分析sprite->setBatchNode(this)，m_pobTextureAtlas->insertQuad(&quad, index)。其它的代码sprite->setDirty(true)及时表示精灵需要重新计算变换矩阵，ccArrayAppendObjectWithResize(descendantsData, sprite)就是往descendantsData添加sprite，会加在数组最后一个元素后面.sprite->setAtlasIndex(index)让sprite存储它加入的次序作为纹理的渲染次序，渲染时会调整。下面先看sprite->setBatchNode(this)的代码：

void CCSprite::setBatchNode(CCSpriteBatchNode *pobSpriteBatchNode)
{
    m_pobBatchNode = pobSpriteBatchNode; // weak reference

    // self render
    if( ! m_pobBatchNode ) {
        m_uAtlasIndex = CCSpriteIndexNotInitialized;
        setTextureAtlas(NULL);
        m_bRecursiveDirty = false;
        setDirty(false);

        float x1 = m_obOffsetPosition.x;
        float y1 = m_obOffsetPosition.y;
        float x2 = x1 + m_obRect.size.width;
        float y2 = y1 + m_obRect.size.height;
        m_sQuad.bl.vertices = vertex3( x1, y1, 0 );
        m_sQuad.br.vertices = vertex3( x2, y1, 0 );
        m_sQuad.tl.vertices = vertex3( x1, y2, 0 );
        m_sQuad.tr.vertices = vertex3( x2, y2, 0 );

    } else {

        // using batch
        m_transformToBatch = CCAffineTransformIdentity;
        setTextureAtlas(m_pobBatchNode->getTextureAtlas()); // weak ref
    }
}

它是将精灵设置为一个批节点，如果m_pobBatchNode为false，表示精灵使用自己的draw渲染自己，后面就是求m_sQuad中存储的顶点信息的。
m_pobTextureAtlas->insertQuad(&quad, index)代码如下：

void CCTextureAtlas::insertQuad(ccV3F_C4B_T2F_Quad *quad, unsigned int index)
{
    CCAssert( index < m_uCapacity, "insertQuadWithTexture: Invalid index");

    m_uTotalQuads++;
    CCAssert( m_uTotalQuads <= m_uCapacity, "invalid totalQuads");

    // issue #575. index can be > totalQuads
    unsigned int remaining = (m_uTotalQuads-1) - index;

    // last object doesn't need to be moved
    if( remaining > 0)
    {
        // texture coordinates
        memmove( &m_pQuads[index+1],&m_pQuads[index], sizeof(m_pQuads[0]) * remaining );        
    }

    m_pQuads[index] = *quad;


    m_bDirty = true;

}

上面代码利用memmove把index开始的(m_uTotalQuads-1) - index个元素后移一个单位，然后在index插入quad的，这样元素就保存到CCTextureAtlas的m_pQuads中了。最后m_bDirty = true强制下次渲染时更新gl显存中顶点数据。

从addChild可以看出，添加一个孩子会把这个孩子以及所有的孩子的后裔加入到m_pobDescendants中，次序是先父孩子，再递归的加入父孩子的子孩子，然后通过sprite->setAtlasIndex(index)设置sprite在纹理集中的顶点索引，最后根据索引把quad放到m_pobTextureAtlas中存储quad的对应位置上。

参考:

[VAOs,_VBOs,_Vertex_and_Fragment_Shaders(C_/SDL)](https://www.opengl.org/wiki/Tutorial2:_VAOs,_VBOs,_Vertex_and_Fragment_Shaders(C_/SDL)”>https://www.opengl.org/wiki/Tutorial2:_VAOs,_VBOs,_Vertex_and_Fragment_Shaders(C_/SDL)](https://www.opengl.org/wiki/Tutorial2:_VAOs,_VBOs,_Vertex_and_Fragment_Shaders(C_/_SDL)
http://www.zwqxin.com/archives/opengl/vao-and-vbo-stuff.html