 CU tree跟x264的MB tree基本一致,比较简单的解释作用就是:帧与帧之间存在参考的关系,如果被参考的帧拥有更高的质量,那么通过调整一个帧,就可以改善一批帧质量,因此CU tree是根据帧被引用得程度,也可以认为是遗传给了其他帧多少信息,作为衡量该帧的重要性;

因为考虑到遗传是可以累加的,所以采用的逆序遍历的方式进行CU tree中遗传信息的计算,比如b参考p0,则p0的遗传信息对应的公式如下:

遗传信息公式 =(propagate_in + intra_cost * inv_qscales*fps_factor) * (1 - inter_cost / intra_cost) * dist_scale_factor







/* Estimate the total amount of influence on future quality that could be had if we
 * were to improve the reference samples used to inter predict any given CU. */
static void estimateCUPropagateCost(int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts,
                                    const int32_t* invQscales, const double* fpsFactor, int len)
    double fps = *fpsFactor / 256;  // range[0.01, 1.00]
    for (int i = 0; i < len; i++)
        int intraCost = intraCosts[i];
        int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
        double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
        double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
        double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
        double propagateDenom = (double)intraCost;             // Q32
        dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);


qpoffset =5\ast \left ( 1-qcompress \right )\ast log_{2}\left ( 1+\frac{propagate }{intra * invQscaleFactor * fpsFactor} \right )



void Lookahead::cuTreeFinish(Lowres *frame, double averageDuration, int ref0Distance)
{   //省略多余代码
    for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
            {   //CU的intracost(MB自身包含的信息)
                int intracost = (frame->intraCost[cuIndex] * frame->invQscaleFactor[cuIndex] + 128) >> 8;
                if (intracost)
                {   //propagateCost(遗传给后续帧的信息)
                    int propagateCost = (frame->propagateCost[cuIndex] * fpsFactor + 128) >> 8;
                    double log2_ratio = X265_LOG2(intracost + propagateCost) - X265_LOG2(intracost) + weightdelta;
                    frame->qpCuTreeOffset[cuIndex] = frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;







lookahead流程如下图1,与整体x265的关系如图2 (其中黄色部分):





void Lookahead::addPicture(Frame& curFrame, int sliceType)
{   //如果启用了参数analysisLoad且禁用了前向预测(bDisableLookahead),那么会将图片直接添加到输出队列中,并增加m_inputCount计数器。
    if (m_param->analysisLoad && m_param->bDisableLookahead)
        if (!m_filled)
            m_filled = true;
        curFrame.m_lowres.sliceType = sliceType;


用于检查前向预测队列(lookahead queue)的状态。以下是对代码的解释:

void Lookahead::checkLookaheadQueue(int &frameCnt)
    /* determine if the lookahead is (over) filled enough for frames to begin to
     * be consumed by frame encoders */
    if (!m_filled)
    {   //如果参数bframes和lookaheadDepth都为零,表示使用零延迟模式,此时将m_filled设置为true(表示前向预测队列已满)
        if (!m_param->bframes & !m_param->lookaheadDepth)
            m_filled = true; /* zero-latency */
        else if (frameCnt >= m_param->lookaheadDepth + 2 + m_param->bframes)
            m_filled = true; /* full capacity plus mini-gop lag */

    if (m_pool && m_inputQueue.size() >= m_fullQueueSize)


这段代码是前向预测(lookahead)模块中的一部分,用于从输出队列中获取已决定的图片(decided picture)。该方法从输出队列中移除图片,并且只会在没有其他可用图片时阻塞。它只在m_filled为true时开始移除图片,而m_filled在超过前向预测深度的图片已经输入后才设置为true,因此在输出图片被取出之前,slicetypeDecide()应该已经开始运行。第一次slicetypeDecide()显然仍然需要阻塞等待,但之后的slicetypeDecide()将保持领先于编码器(因为每次从输出队列中移除一张图片,就会向输入队列中添加一张图片),并在编码器需要它们之前决定图片的切片类型。以下是对代码的解释:

Frame* Lookahead::getDecidedPicture()
{   //检查m_filled变量是否为true,即是否已经填充了足够的图片到输出队列中
    if (m_filled)//表示已经可以从输出队列中获取图片
    {   //获取输出锁(m_outputLock)以确保线程安全地访问输出队列
        Frame *out = m_outputQueue.popFront();
        if (out)
            return out;
        if (m_param->analysisLoad && m_param->bDisableLookahead)
            return NULL;

        findJob(-1); /* run slicetypeDecide() if necessary */

        bool wait = m_outputSignalRequired = m_sliceTypeBusy;
        if (wait)
        out = m_outputQueue.popFront();
        if (out)
        return out;
        return NULL;



void Lookahead::findJob(int /*workerThreadID*/)
    bool doDecide;
    if (m_inputQueue.size() >= m_fullQueueSize && !m_sliceTypeBusy && m_isActive)
        doDecide = m_sliceTypeBusy = true;
        doDecide = m_helpWanted = false;

    if (!doDecide)
    ProfileLookaheadTime(m_slicetypeDecideElapsedTime, m_countSlicetypeDecide);
    if (m_outputSignalRequired)
        m_outputSignalRequired = false;
    m_sliceTypeBusy = false;



oid Lookahead::slicetypeDecide()
{   //创建 PreLookaheadGroup 类的实例 pre,并传入当前 Lookahead 对象的引用
    PreLookaheadGroup pre(*this);
    //创建 Lowres 指针数组 frames 和 Frame 指针数组 list,并将它们初始化为零
    Lowres* frames[X265_LOOKAHEAD_MAX + X265_BFRAME_MAX + 4];
    Frame*  list[X265_BFRAME_MAX + 4];
    memset(frames, 0, sizeof(frames));
    memset(list, 0, sizeof(list));
    //计算最大搜索范围 maxSearch,取 m_param->lookaheadDepth 和 X265_LOOKAHEAD_MAX 中的最小值,并确保至少为 1
    int maxSearch = X265_MIN(m_param->lookaheadDepth, X265_LOOKAHEAD_MAX);   
    maxSearch = X265_MAX(1, maxSearch);

    {   //获取输入锁 m_inputLock 的互斥访问权限
        ScopedLock lock(m_inputLock);
        //获取输入队列中的当前帧 curFrame,并定义整数变量 j
        Frame *curFrame = m_inputQueue.first();
        int j;
		if (m_param->bResetZoneConfig)
		{   //遍历 m_param->rc.zones 数组中的每个区域配置
			for (int i = 0; i < m_param->rc.zonefileCount; i++)
			{   //如果当前帧的 m_poc 等于区域配置的 startFrame,将 m_param 更新为该区域配置的 zoneParam
				if (m_param->rc.zones[i].startFrame == curFrame->m_poc)
					m_param = m_param->rc.zones[i].zoneParam;
        //遍历 m_param->bframes + 2 次,将当前帧 curFrame 添加到 list 数组中,并将 curFrame 更新为下一帧
        for (j = 0; j < m_param->bframes + 2; j++)
            if (!curFrame) break;
            list[j] = curFrame;
            curFrame = curFrame->m_next;
        //将输入队列中的第一帧赋值给 curFrame,将 m_lastNonB 赋值给 frames[0]
        curFrame = m_inputQueue.first();
        frames[0] = m_lastNonB;
        //遍历最大搜索范围 maxSearch 次,将当前帧的低分辨率帧 curFrame->m_lowres 添加到 frames 数组中的相应位置
        for (j = 0; j < maxSearch; j++)
            if (!curFrame) break;
            frames[j + 1] = &curFrame->m_lowres;
            //如果当前帧的低分辨率帧尚未初始化,将当前帧添加到 pre.m_preframes 数组中,并增加 pre.m_jobTotal 的计数
            if (!curFrame->m_lowresInit)
                pre.m_preframes[pre.m_jobTotal++] = curFrame;

            curFrame = curFrame->m_next;
        //更新最大搜索范围 maxSearch 为实际遍历的次数
        maxSearch = j;
    //如果存在需要进行预分析的帧(pre.m_jobTotal > 0),执行以下操作
    /* perform pre-analysis on frames which need it, using a bonded task group */
    if (pre.m_jobTotal)
    {   //如果线程池 m_pool 存在,尝试将预分析任务与其他任务进行绑定
        if (m_pool)
            pre.tryBondPeers(*m_pool, pre.m_jobTotal);
        //调用 pre.processTasks(-1) 执行预分析任务
    {   //初始化一些变量,包括 endIndex、length 和 m_frameVariance 数组
        int j, endIndex = 0, length = X265_BFRAME_MAX + 4;
        for (j = 0; j < length; j++)
            m_frameVariance[j] = -1;
        //遍历帧列表 list,将每个帧的低分辨率帧方差(frameVariance)存储在 m_frameVariance 数组中相应位置
        for (j = 0; list[j] != NULL; j++)
            m_frameVariance[list[j]->m_poc % length] = list[j]->m_lowres.frameVariance;
        //根据 m_frameVariance 数组中的值判断是否存在淡入区域。遍历 m_frameVariance 数组的索引 k,并执行以下操作
        for (int k = list[0]->m_poc % length; k <= list[j - 1]->m_poc % length; k++)
        {   //如果当前索引 k 对应的 m_frameVariance 值为 -1,则跳出循环
            if (m_frameVariance[k]  == -1)
            //如果当前索引 k 大于 0 并且当前 m_frameVariance[k] 大于等于前一个位置的 m_frameVariance 值,或者如果当前索引 k 等于 0 并且当前 m_frameVariance[k] 大于等于 m_frameVariance[length - 1](数组的最后一个元素),则表示进入了淡入区域
            if((k > 0 && m_frameVariance[k] >= m_frameVariance[k - 1]) || 
                (k == 0 && m_frameVariance[k] >= m_frameVariance[length - 1]))
                m_isFadeIn = true;
                //如果 m_fadeCount 和 m_fadeStart 均为初始值(0 和 -1),则根据当前帧列表中的帧的 POC(Presentation Order Count)值来确定 m_fadeStart 的值
                if (m_fadeCount == 0 && m_fadeStart == -1)
                    for(int temp = list[0]->m_poc; temp <= list[j - 1]->m_poc; temp++)
                        if (k == temp % length) {
                            m_fadeStart = temp ? temp - 1 : 0;
                //更新 m_fadeCount 的值为 list[endIndex]->m_poc - m_fadeStart,其中 endIndex 是当前帧列表中的索引
                m_fadeCount = list[endIndex]->m_poc > m_fadeStart ? list[endIndex]->m_poc - m_fadeStart : 0;
            {   //否则,如果当前已经处于淡入区域,并且 m_fadeCount 大于等于 m_param->fpsNum / m_param->fpsDenom(每秒帧数的分子除以分母),则表示淡入区域已经结束。将 m_lowres.bIsFadeEnd 设置为 true,以指示当前帧是淡入区域的结束帧
                if (m_isFadeIn && m_fadeCount >= m_param->fpsNum / m_param->fpsDenom)
                    for (int temp = 0; list[temp] != NULL; temp++)
                        if (list[temp]->m_poc == m_fadeStart + (int)m_fadeCount)
                            list[temp]->m_lowres.bIsFadeEnd = true;
                m_isFadeIn = false;
                m_fadeCount = 0;
                m_fadeStart = -1;
            //如果当前索引 k 等于数组的最后一个索引(length - 1),则将 k 重置为 -1,以便下一次循环时 k 递增为 0
            if (k == length - 1)
                k = -1;
    if (m_lastNonB &&
        ((m_param->bFrameAdaptive && m_param->bframes) ||
         m_param->rc.cuTree || m_param->scenecutThreshold || m_param->bHistBasedSceneCut ||
         (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
    {   //如果 m_param->rc.bStatRead 为假,则调用 slicetypeAnalyse 函数,对帧进行分析
        if (!m_param->rc.bStatRead)
            slicetypeAnalyse(frames, false);
        //根据一些条件判断是否需要进行 VBV(Video Buffering Verifier)预测
        bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
        if ((m_param->analysisLoad && m_param->scaleFactor && bIsVbv) || m_param->bliveVBV2pass)
            int numFrames;
            //遍历帧列表 frames,直到达到最大搜索数 maxSearch 或者遇到空帧(即指针为空),每次递增 numFrames。
            for (numFrames = 0; numFrames < maxSearch; numFrames++)
                Lowres *fenc = frames[numFrames + 1];
                if (!fenc)
            //调用 vbvLookahead 函数,传递帧列表 frames、numFrames 和 false 参数,进行 VBV 预测
            vbvLookahead(frames, numFrames, false);

    int bframes, brefs;
    if (!m_param->analysisLoad || m_param->bAnalysisType == HEVC_INFO)
        bool isClosedGopRadl = m_param->radl && (m_param->keyframeMax != m_param->keyframeMin);
        for (bframes = 0, brefs = 0;; bframes++)
            Lowres& frm = list[bframes]->m_lowres;

            if (frm.sliceType == X265_TYPE_BREF && !m_param->bBPyramid && brefs == m_param->bBPyramid)
                frm.sliceType = X265_TYPE_B;
                x265_log(m_param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid\n",

            /* pyramid with multiple B-refs needs a big enough dpb that the preceding P-frame stays available.
             * smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it. */
            else if (frm.sliceType == X265_TYPE_BREF && m_param->bBPyramid && brefs &&
                m_param->maxNumReferences <= (brefs + 3))
                frm.sliceType = X265_TYPE_B;
                x265_log(m_param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid and %d reference frames\n",
                    frm.sliceType, m_param->maxNumReferences);
            if (((!m_param->bIntraRefresh || frm.frameNum == 0) && frm.frameNum - m_lastKeyframe >= m_param->keyframeMax &&
                (!m_extendGopBoundary || frm.frameNum - m_lastKeyframe >= m_param->keyframeMax + m_param->gopLookahead)) ||
                (frm.frameNum == (m_param->chunkStart - 1)) || (frm.frameNum == m_param->chunkEnd))
                if (frm.sliceType == X265_TYPE_AUTO || frm.sliceType == X265_TYPE_I)
                    frm.sliceType = m_param->bOpenGOP && m_lastKeyframe >= 0 ? X265_TYPE_I : X265_TYPE_IDR;
                bool warn = frm.sliceType != X265_TYPE_IDR;
                if (warn && m_param->bOpenGOP)
                    warn &= frm.sliceType != X265_TYPE_I;
                if (warn)
                    x265_log(m_param, X265_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n",
                        frm.sliceType, frm.frameNum);
                    frm.sliceType = m_param->bOpenGOP && m_lastKeyframe >= 0 ? X265_TYPE_I : X265_TYPE_IDR;
            if (frm.bIsFadeEnd){
                frm.sliceType = m_param->bOpenGOP && m_lastKeyframe >= 0 ? X265_TYPE_I : X265_TYPE_IDR;
            if (m_param->bResetZoneConfig)
                for (int i = 0; i < m_param->rc.zonefileCount; i++)
                    int curZoneStart = m_param->rc.zones[i].startFrame;
                    curZoneStart += curZoneStart ? m_param->rc.zones[i].zoneParam->radl : 0;
                    if (curZoneStart == frm.frameNum)
                        frm.sliceType = X265_TYPE_IDR;
            if ((frm.sliceType == X265_TYPE_I && frm.frameNum - m_lastKeyframe >= m_param->keyframeMin) || (frm.frameNum == (m_param->chunkStart - 1)) || (frm.frameNum == m_param->chunkEnd))
                if (m_param->bOpenGOP)
                    m_lastKeyframe = frm.frameNum;
                    frm.bKeyframe = true;
                    frm.sliceType = X265_TYPE_IDR;
            if (frm.sliceType == X265_TYPE_IDR && frm.bScenecut && isClosedGopRadl)
                for (int i = bframes; i < bframes + m_param->radl; i++)
                    list[i]->m_lowres.sliceType = X265_TYPE_B;
                list[(bframes + m_param->radl)]->m_lowres.sliceType = X265_TYPE_IDR;
            if (frm.sliceType == X265_TYPE_IDR)
                /* Closed GOP */
                m_lastKeyframe = frm.frameNum;
                frm.bKeyframe = true;
                int zoneRadl = 0;
                if (m_param->bResetZoneConfig)
                    for (int i = 0; i < m_param->rc.zonefileCount; i++)
                        int zoneStart = m_param->rc.zones[i].startFrame;
                        zoneStart += zoneStart ? m_param->rc.zones[i].zoneParam->radl : 0;
                        if (zoneStart == frm.frameNum)
                            zoneRadl = m_param->rc.zones[i].zoneParam->radl;
                            m_param->radl = 0;
                            m_param->rc.zones->zoneParam->radl = i < m_param->rc.zonefileCount - 1 ? m_param->rc.zones[i + 1].zoneParam->radl : 0;
                if (bframes > 0 && !m_param->radl && !zoneRadl)
                    list[bframes - 1]->m_lowres.sliceType = X265_TYPE_P;
            if (bframes == m_param->bframes || !list[bframes + 1])
                if (IS_X265_TYPE_B(frm.sliceType))
                    x265_log(m_param, X265_LOG_WARNING, "specified frame type is not compatible with max B-frames\n");
                if (frm.sliceType == X265_TYPE_AUTO || IS_X265_TYPE_B(frm.sliceType))
                    frm.sliceType = X265_TYPE_P;
            if (frm.sliceType == X265_TYPE_BREF)
            if (frm.sliceType == X265_TYPE_AUTO)
                frm.sliceType = X265_TYPE_B;
            else if (!IS_X265_TYPE_B(frm.sliceType))
        for (bframes = 0, brefs = 0;; bframes++)
            Lowres& frm = list[bframes]->m_lowres;
            if (frm.sliceType == X265_TYPE_BREF)
            if ((IS_X265_TYPE_I(frm.sliceType) && frm.frameNum - m_lastKeyframe >= m_param->keyframeMin)
                || (frm.frameNum == (m_param->chunkStart - 1)) || (frm.frameNum == m_param->chunkEnd))
                m_lastKeyframe = frm.frameNum;
                frm.bKeyframe = true;
            if (!IS_X265_TYPE_B(frm.sliceType))

    if (m_param->bEnableTemporalSubLayers > 2)
        //Split the partial mini GOP into sub mini GOPs when temporal sub layers are enabled
        if (bframes < m_param->bframes)
            int leftOver = bframes + 1;
            int8_t gopId = m_gopId - 1;
            int gopLen = x265_gop_ra_length[gopId];
            int listReset = 0;


            while ((gopId >= 0) && (leftOver > 3))
                if (leftOver < gopLen)
                    gopId = gopId - 1;
                    gopLen = x265_gop_ra_length[gopId];
                    int newbFrames = listReset + gopLen - 1;
                    //Re-assign GOP
                    list[newbFrames]->m_lowres.sliceType = IS_X265_TYPE_I(list[newbFrames]->m_lowres.sliceType) ? list[newbFrames]->m_lowres.sliceType : X265_TYPE_P;
                    if (newbFrames)
                        list[newbFrames - 1]->m_lowres.bLastMiniGopBFrame = true;
                    list[newbFrames]->m_lowres.leadingBframes = newbFrames;
                    m_lastNonB = &list[newbFrames]->m_lowres;

                    /* insert a bref into the sequence */
                    if (m_param->bBPyramid && newbFrames)
                        placeBref(list, listReset, newbFrames, newbFrames + 1, &brefs);
                    if (m_param->rc.rateControlMode != X265_RC_CQP)
                        int p0, p1, b;
                        /* For zero latency tuning, calculate frame cost to be used later in RC */
                        if (!maxSearch)
                            for (int i = listReset; i <= newbFrames; i++)
                                frames[i + 1] = &list[listReset + i]->m_lowres;

                        /* estimate new non-B cost */
                        p1 = b = newbFrames + 1;
                        p0 = (IS_X265_TYPE_I(frames[newbFrames + 1]->sliceType)) ? b : listReset;

                        CostEstimateGroup estGroup(*this, frames);

                        estGroup.singleCost(p0, p1, b);

                        if (newbFrames)
                            compCostBref(frames, listReset, newbFrames, newbFrames + 1);

                    /* dequeue all frames from inputQueue that are about to be enqueued
                     * in the output queue. The order is important because Frame can
                     * only be in one list at a time */
                    int64_t pts[X265_BFRAME_MAX + 1];
                    for (int i = 0; i < gopLen; i++)
                        Frame *curFrame;
                        curFrame = m_inputQueue.popFront();
                        pts[i] = curFrame->m_pts;

                    int idx = 0;
                    /* add non-B to output queue */
                    list[newbFrames]->m_reorderedPts = pts[idx++];
                    list[newbFrames]->m_gopOffset = 0;
                    list[newbFrames]->m_gopId = gopId;
                    list[newbFrames]->m_tempLayer = x265_gop_ra[gopId][0].layer;

                    /* add B frames to output queue */
                    int i = 1, j = 1;
                    while (i < gopLen)
                        int offset = listReset + (x265_gop_ra[gopId][j].poc_offset - 1);
                        if (!list[offset] || offset == newbFrames)

                        // Assign gop offset and temporal layer of frames
                        list[offset]->m_gopOffset = j;
                        list[bframes]->m_gopId = gopId;
                        list[offset]->m_tempLayer = x265_gop_ra[gopId][j++].layer;

                        list[offset]->m_reorderedPts = pts[idx++];

                    listReset += gopLen;
                    leftOver = leftOver - gopLen;
                    gopId -= 1;
                    gopLen = (gopId >= 0) ? x265_gop_ra_length[gopId] : 0;

            if (leftOver > 0 && leftOver < 4)
                int64_t pts[X265_BFRAME_MAX + 1];
                int idx = 0;

                int newbFrames = listReset + leftOver - 1;
                list[newbFrames]->m_lowres.sliceType = IS_X265_TYPE_I(list[newbFrames]->m_lowres.sliceType) ? list[newbFrames]->m_lowres.sliceType : X265_TYPE_P;
                if (newbFrames)
                        list[newbFrames - 1]->m_lowres.bLastMiniGopBFrame = true;
                list[newbFrames]->m_lowres.leadingBframes = newbFrames;
                m_lastNonB = &list[newbFrames]->m_lowres;

                /* insert a bref into the sequence */
                if (m_param->bBPyramid && (newbFrames- listReset) > 1)
                    placeBref(list, listReset, newbFrames, newbFrames + 1, &brefs);

                if (m_param->rc.rateControlMode != X265_RC_CQP)
                    int p0, p1, b;
                    /* For zero latency tuning, calculate frame cost to be used later in RC */
                    if (!maxSearch)
                        for (int i = listReset; i <= newbFrames; i++)
                            frames[i + 1] = &list[listReset + i]->m_lowres;

                        /* estimate new non-B cost */
                    p1 = b = newbFrames + 1;
                    p0 = (IS_X265_TYPE_I(frames[newbFrames + 1]->sliceType)) ? b : listReset;

                    CostEstimateGroup estGroup(*this, frames);

                    estGroup.singleCost(p0, p1, b);

                    if (newbFrames)
                        compCostBref(frames, listReset, newbFrames, newbFrames + 1);

                /* dequeue all frames from inputQueue that are about to be enqueued
                 * in the output queue. The order is important because Frame can
                 * only be in one list at a time */
                for (int i = 0; i < leftOver; i++)
                    Frame *curFrame;
                    curFrame = m_inputQueue.popFront();
                    pts[i] = curFrame->m_pts;

                m_lastNonB = &list[newbFrames]->m_lowres;
                list[newbFrames]->m_reorderedPts = pts[idx++];
                list[newbFrames]->m_gopOffset = 0;
                list[newbFrames]->m_gopId = -1;
                list[newbFrames]->m_tempLayer = 0;
                if (brefs)
                    for (int i = listReset; i < newbFrames; i++)
                        if (list[i]->m_lowres.sliceType == X265_TYPE_BREF)
                            list[i]->m_reorderedPts = pts[idx++];
                            list[i]->m_gopOffset = 0;
                            list[i]->m_gopId = -1;
                            list[i]->m_tempLayer = 0;

                /* add B frames to output queue */
                for (int i = listReset; i < newbFrames; i++)
                    /* push all the B frames into output queue except B-ref, which already pushed into output queue */
                    if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
                        list[i]->m_reorderedPts = pts[idx++];
                        list[i]->m_gopOffset = 0;
                        list[i]->m_gopId = -1;
                        list[i]->m_tempLayer = 1;
        // Fill the complete mini GOP when temporal sub layers are enabled

            list[bframes - 1]->m_lowres.bLastMiniGopBFrame = true;
            list[bframes]->m_lowres.leadingBframes = bframes;
            m_lastNonB = &list[bframes]->m_lowres;

            /* insert a bref into the sequence */
            if (m_param->bBPyramid && !brefs)
                placeBref(list, 0, bframes, bframes + 1, &brefs);

            /* calculate the frame costs ahead of time for estimateFrameCost while we still have lowres */
            if (m_param->rc.rateControlMode != X265_RC_CQP)
                int p0, p1, b;
                /* For zero latency tuning, calculate frame cost to be used later in RC */
                if (!maxSearch)
                    for (int i = 0; i <= bframes; i++)
                        frames[i + 1] = &list[i]->m_lowres;

                /* estimate new non-B cost */
                p1 = b = bframes + 1;
                p0 = (IS_X265_TYPE_I(frames[bframes + 1]->sliceType)) ? b : 0;

                CostEstimateGroup estGroup(*this, frames);
                estGroup.singleCost(p0, p1, b);

                compCostBref(frames, 0, bframes, bframes + 1);

            /* dequeue all frames from inputQueue that are about to be enqueued
            * in the output queue. The order is important because Frame can
            * only be in one list at a time */
            int64_t pts[X265_BFRAME_MAX + 1];
            for (int i = 0; i <= bframes; i++)
                Frame *curFrame;
                curFrame = m_inputQueue.popFront();
                pts[i] = curFrame->m_pts;


            int idx = 0;
            /* add non-B to output queue */
            list[bframes]->m_reorderedPts = pts[idx++];
            list[bframes]->m_gopOffset = 0;
            list[bframes]->m_gopId = m_gopId;
            list[bframes]->m_tempLayer = x265_gop_ra[m_gopId][0].layer;

            int i = 1, j = 1;
            while (i <= bframes)
                int offset = x265_gop_ra[m_gopId][j].poc_offset - 1;
                if (!list[offset] || offset == bframes)

                // Assign gop offset and temporal layer of frames
                list[offset]->m_gopOffset = j;
                list[offset]->m_gopId = m_gopId;
                list[offset]->m_tempLayer = x265_gop_ra[m_gopId][j++].layer;

                /* add B frames to output queue */
                list[offset]->m_reorderedPts = pts[idx++];

        bool isKeyFrameAnalyse = (m_param->rc.cuTree || (m_param->rc.vbvBufferSize && m_param->lookaheadDepth));
        if (isKeyFrameAnalyse && IS_X265_TYPE_I(m_lastNonB->sliceType))
            Frame *curFrame = m_inputQueue.first();
            frames[0] = m_lastNonB;
            int j;
            for (j = 0; j < maxSearch; j++)
                frames[j + 1] = &curFrame->m_lowres;
                curFrame = curFrame->m_next;

            frames[j + 1] = NULL;
            if (!m_param->rc.bStatRead)
                slicetypeAnalyse(frames, true);
            bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
            if ((m_param->analysisLoad && m_param->scaleFactor && bIsVbv) || m_param->bliveVBV2pass)
                int numFrames;
                for (numFrames = 0; numFrames < maxSearch; numFrames++)
                    Lowres *fenc = frames[numFrames + 1];
                    if (!fenc)
                vbvLookahead(frames, numFrames, true);


        if (bframes)
            list[bframes - 1]->m_lowres.bLastMiniGopBFrame = true;
        list[bframes]->m_lowres.leadingBframes = bframes;
        m_lastNonB = &list[bframes]->m_lowres;
        //接下来的代码段是关于插入B参考帧(B reference frame)的。如果满足条件m_param->bBPyramid为真,且bframes大于1,且brefs为0,则会调用placeBref函数将B参考帧插入到序列中
        /* insert a bref into the sequence */
        if (m_param->bBPyramid && bframes > 1 && !brefs)
            placeBref(list, 0, bframes, bframes + 1, &brefs);
        /* calculate the frame costs ahead of time for estimateFrameCost while we still have lowres */
        if (m_param->rc.rateControlMode != X265_RC_CQP)
            int p0, p1, b;
            /* For zero latency tuning, calculate frame cost to be used later in RC */
            if (!maxSearch)
                for (int i = 0; i <= bframes; i++)
                    frames[i + 1] = &list[i]->m_lowres;

            /* estimate new non-B cost */
            p1 = b = bframes + 1;
            p0 = (IS_X265_TYPE_I(frames[bframes + 1]->sliceType)) ? b : 0;

            CostEstimateGroup estGroup(*this, frames);
            estGroup.singleCost(p0, p1, b);

            if (m_param->bEnableTemporalSubLayers > 1 && bframes)
                compCostBref(frames, 0, bframes, bframes + 1);
                if (bframes)
                    p0 = 0; // last nonb
                    bool isp0available = frames[bframes + 1]->sliceType == X265_TYPE_IDR ? false : true;

                    for (b = 1; b <= bframes; b++)
                        if (!isp0available)
                            p0 = b;

                        if (frames[b]->sliceType == X265_TYPE_B)
                            for (p1 = b; frames[p1]->sliceType == X265_TYPE_B; p1++)
                                ; // find new nonb or bref
                            p1 = bframes + 1;

                        estGroup.singleCost(p0, p1, b);

                        if (frames[b]->sliceType == X265_TYPE_BREF)
                            p0 = b;
                            isp0available = true;
        /* dequeue all frames from inputQueue that are about to be enqueued
         * in the output queue. The order is important because Frame can
         * only be in one list at a time */
        int64_t pts[X265_BFRAME_MAX + 1];
        for (int i = 0; i <= bframes; i++)
            Frame *curFrame;
            curFrame = m_inputQueue.popFront();
            pts[i] = curFrame->m_pts;


        /* add non-B to output queue */
        int idx = 0;
        list[bframes]->m_reorderedPts = pts[idx++];
        /* Add B-ref frame next to P frame in output queue, the B-ref encode before non B-ref frame */
        if (brefs)
            for (int i = 0; i < bframes; i++)
                if (list[i]->m_lowres.sliceType == X265_TYPE_BREF)
                    list[i]->m_reorderedPts = pts[idx++];
        /* add B frames to output queue */
        for (int i = 0; i < bframes; i++)
            /* push all the B frames into output queue except B-ref, which already pushed into output queue */
            if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
                list[i]->m_reorderedPts = pts[idx++];

        bool isKeyFrameAnalyse = (m_param->rc.cuTree || (m_param->rc.vbvBufferSize && m_param->lookaheadDepth));
        if (isKeyFrameAnalyse && IS_X265_TYPE_I(m_lastNonB->sliceType))
            Frame *curFrame = m_inputQueue.first();
            frames[0] = m_lastNonB;
            int j;
            for (j = 0; j < maxSearch; j++)
                frames[j + 1] = &curFrame->m_lowres;
                curFrame = curFrame->m_next;

            frames[j + 1] = NULL;
            if (!m_param->rc.bStatRead)
                slicetypeAnalyse(frames, true);
            bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
            if ((m_param->analysisLoad && m_param->scaleFactor && bIsVbv) || m_param->bliveVBV2pass)
                int numFrames;
                for (numFrames = 0; numFrames < maxSearch; numFrames++)
                    Lowres *fenc = frames[numFrames + 1];
                    if (!fenc)
                vbvLookahead(frames, numFrames, true);




void PreLookaheadGroup::processTasks(int workerThreadID)
    //如果 workerThreadID 小于 0,则将其设置为 m_lookahead 对象的线程池中的工作线程数量,否则将其设置为 0
    if (workerThreadID < 0)
        workerThreadID = m_lookahead.m_pool ? m_lookahead.m_pool->m_numWorkers : 0;
    //获取与工作线程ID对应的 LookaheadTLD 对象引用 tld,即预先分析任务相关的线程本地数据
    LookaheadTLD& tld = m_lookahead.m_tld[workerThreadID];
    //获取锁 m_lock 的互斥访问权限
    //在循环中,只要已经获取的任务数量 m_jobAcquired 小于总任务数量 m_jobTotal
    while (m_jobAcquired < m_jobTotal)
    {   //获取当前需要处理的预先分析帧 preFrame,并将 m_jobAcquired 自增
        Frame* preFrame = m_preframes[m_jobAcquired++];
        ProfileLookaheadTime(m_lookahead.m_preLookaheadElapsedTime, m_lookahead.m_countPreLookahead);
        //释放锁 m_lock
        //初始化预先分析帧的低分辨率帧 preFrame->m_lowres,使用 preFrame->m_fencPic 和 preFrame->m_poc 初始化
        preFrame->m_lowres.init(preFrame->m_fencPic, preFrame->m_poc);
        //如果启用了自适应量化 (m_lookahead.m_bAdaptiveQuant),则调用 tld.calcAdaptiveQuantFrame 方法计算自适应量化帧
        if (m_lookahead.m_bAdaptiveQuant)
            tld.calcAdaptiveQuantFrame(preFrame, m_lookahead.m_param);
        //如果启用了基于直方图的场景切换检测 (m_lookahead.m_param->bHistBasedSceneCut),则调用 tld.collectPictureStatistics 方法收集图片统计信息
        if (m_lookahead.m_param->bHistBasedSceneCut)
        //调用 tld.lowresIntraEstimate 方法进行低分辨率帧的帧内估计
        tld.lowresIntraEstimate(preFrame->m_lowres, m_lookahead.m_param->rc.qgSize);
        preFrame->m_lowresInit = true;
        //获取锁 m_lock 的互斥访问权限
    //释放锁 m_lock



void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
{   //定义了一些局部变量和常量,包括像素数组 prediction、fencIntra、neighbours,以及指向 neighbours 中两个不同位置的指针 samples 和 filtered
    ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
    pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
    pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1];
    pixel* samples = neighbours[0], *filtered = neighbours[1];
    //初始化一些参数,如预测模式相关的 lambda 值、帧内预测的惩罚值、CU(Coding Unit)的大小和索引等
    const int lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
    const int intraPenalty = 5 * lookAheadLambda;
    const int lowresPenalty = 4; /* fixed CU cost overhead */

    const int cuSize  = X265_LOWRES_CU_SIZE;
    const int cuSize2 = cuSize << 1;
    const int sizeIdx = X265_LOWRES_CU_BITS - 2;

    pixelcmp_t satd = primitives.pu[sizeIdx].satd;
    int planar = !!(cuSize >= 8);

    int costEst = 0, costEstAq = 0;
    //对于每个 CU 的 Y 坐标(cuY)循环遍历,范围是从 0 到 heightInCU - 1
    for (int cuY = 0; cuY < heightInCU; cuY++)
        fenc.rowSatds[0][0][cuY] = 0;
        //在每个 CU 的 X 坐标(cuX)循环遍历,范围是从 0 到 widthInCU - 1
        for (int cuX = 0; cuX < widthInCU; cuX++)
        {   //计算当前 CU 的索引 cuXY 和像素偏移量 pelOffset
            const int cuXY = cuX + cuY * widthInCU;
            const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * fenc.lumaStride;
            pixel *pixCur = fenc.lowresPlane[0] + pelOffset;

            /* copy fenc pixels *///将当前 CU 的像素拷贝到 fencIntra 数组中
            primitives.cu[sizeIdx].copy_pp(fencIntra, cuSize, pixCur, fenc.lumaStride);

            /* collect reference sample pixels */
            //收集邻域样本像素,并存储在 samples 数组中。拷贝顶部样本和左侧样本
            pixCur -= fenc.lumaStride + 1;
            memcpy(samples, pixCur, (2 * cuSize + 1) * sizeof(pixel)); /* top */
            for (int i = 1; i <= 2 * cuSize; i++)
                samples[cuSize2 + i] = pixCur[i * fenc.lumaStride];    /* left */

            primitives.cu[sizeIdx].intra_filter(samples, filtered);

            int cost, icost = me.COST_MAX;
            uint32_t ilowmode = 0;
            //对于 DC 和 Planar 两种预测模式,分别进行帧内预测,并计算预测残差的 SATD(Sum of Absolute Transformed Differences)代价。选择较小的代价作为当前 CU 的最佳预测模式
            /* DC and planar */
            primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, samples, 0, cuSize <= 16);
            cost = satd(fencIntra, cuSize, prediction, cuSize);
            COPY2_IF_LT(icost, cost, ilowmode, DC_IDX);

            primitives.cu[sizeIdx].intra_pred[PLANAR_IDX](prediction, cuSize, neighbours[planar], 0, 0);
            cost = satd(fencIntra, cuSize, prediction, cuSize);
            COPY2_IF_LT(icost, cost, ilowmode, PLANAR_IDX);

            /* scan angular predictions */
            int filter, acost = me.COST_MAX;
            uint32_t mode, alowmode = 4;
            //遍历角度预测模式,计算每个模式的预测残差的 SATD 代价,并选择最小的代价作为当前 CU 的最佳预测模式
            for (mode = 5; mode < 35; mode += 5)
                filter = !!(g_intraFilterFlags[mode] & cuSize);
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                cost = satd(fencIntra, cuSize, prediction, cuSize);
                COPY2_IF_LT(acost, cost, alowmode, mode);
            //在最佳预测模式周围的两个模式中,再次计算预测残差的 SATD 代价,并选择最小的代价作为当前 CU 的最终预测模式
            for (uint32_t dist = 2; dist >= 1; dist--)
                int minusmode = alowmode - dist;
                int plusmode = alowmode + dist;

                mode = minusmode;
                filter = !!(g_intraFilterFlags[mode] & cuSize);
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                cost = satd(fencIntra, cuSize, prediction, cuSize);
                COPY2_IF_LT(acost, cost, alowmode, mode);

                mode = plusmode;
                filter = !!(g_intraFilterFlags[mode] & cuSize);
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                cost = satd(fencIntra, cuSize, prediction, cuSize);
                COPY2_IF_LT(acost, cost, alowmode, mode);
            COPY2_IF_LT(icost, acost, ilowmode, alowmode);
            //根据预测模式的代价和惩罚值,估计当前 CU 的帧内信号代价,并更新相关数据结构
            icost += intraPenalty + lowresPenalty; /* estimate intra signal cost */

            fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
            fenc.intraCost[cuXY] = icost;
            fenc.intraMode[cuXY] = (uint8_t)ilowmode;
            /* do not include edge blocks in the 
            frame cost estimates, they are not very accurate */
            //如果当前 CU 不在边缘位置,则将其帧内信号代价累加到整个帧的代价估计中
            const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
                                        cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
            int icostAq;
            if (qgSize == 8)
                icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor8x8[cuXY] + 128) >> 8) : icost;
                icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] +128) >> 8) : icost;

            if (bFrameScoreCU)
                costEst += icost;
                costEstAq += icostAq;

            fenc.rowSatds[0][0][cuY] += icostAq;
    fenc.costEst[0][0] = costEst;
    fenc.costEstAq[0][0] = costEstAq;



void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)
    int numFrames, origNumFrames, keyintLimit, framecnt;
    //根据条件计算最大搜索帧数 maxSearch,取 m_param->lookaheadDepth 和 X265_LOOKAHEAD_MAX 中的较小值
    int maxSearch = X265_MIN(m_param->lookaheadDepth, X265_LOOKAHEAD_MAX);
    int cuCount = m_8x8Blocks;
    int resetStart;
    bool bIsVbvLookahead = m_param->rc.vbvBufferSize && m_param->lookaheadDepth;

    /* count undecided frames */
    //统计未决帧数。遍历帧列表 frames,直到达到最大搜索帧数 maxSearch 或遇到切片类型不为 X265_TYPE_AUTO 的帧,每次递增 framecnt。这一步统计了未决帧的数量
    for (framecnt = 0; framecnt < maxSearch; framecnt++)
        Lowres *fenc = frames[framecnt + 1];
        if (!fenc || fenc->sliceType != X265_TYPE_AUTO)
    //如果 framecnt 为 0,表示未找到未决帧。根据条件判断是否需要进行 CU 树的处理,如果需要,则调用 cuTree 函数进行处理,然后返回
    if (!framecnt)
        if (m_param->rc.cuTree)
            cuTree(frames, 0, bKeyframe);
    }//将 frames[framecnt + 1] 设置为 NULL,表示未决帧之后的帧为空
    frames[framecnt + 1] = NULL;
    //如果启用了区域配置重置(m_param->bResetZoneConfig 为真),则根据区域配置的设置更新 m_param->keyframeMax
    if (m_param->bResetZoneConfig)
        for (int i = 0; i < m_param->rc.zonefileCount; i++)
            int curZoneStart = m_param->rc.zones[i].startFrame, nextZoneStart = 0;
            curZoneStart += curZoneStart ? m_param->rc.zones[i].zoneParam->radl : 0;
            nextZoneStart += (i + 1 < m_param->rc.zonefileCount) ? m_param->rc.zones[i + 1].startFrame + m_param->rc.zones[i + 1].zoneParam->radl : m_param->totalFrames;
            if (curZoneStart <= frames[0]->frameNum && nextZoneStart > frames[0]->frameNum)
                m_param->keyframeMax = nextZoneStart - curZoneStart;
            if (m_param->rc.zones[m_param->rc.zonefileCount - 1].startFrame <= frames[0]->frameNum && nextZoneStart == 0)
                m_param->keyframeMax = m_param->rc.zones[0].keyframeMax;
    }//根据当前帧的帧号和区块的设置,更新 keylimit 的值
    int keylimit = m_param->keyframeMax;
    if (frames[0]->frameNum < m_param->chunkEnd)
        int chunkStart = (m_param->chunkStart - m_lastKeyframe - 1);
        int chunkEnd = (m_param->chunkEnd - m_lastKeyframe);
        if ((chunkStart > 0) && (chunkStart < m_param->keyframeMax))
            keylimit = chunkStart;
        else if ((chunkEnd > 0) && (chunkEnd < m_param->keyframeMax))
            keylimit = chunkEnd;
    //根据 GOP 的设置和可用的关键帧限制,计算 keyFrameLimit 的值
    int keyFrameLimit = keylimit + m_lastKeyframe - frames[0]->frameNum - 1;
    if (m_param->gopLookahead && keyFrameLimit <= m_param->bframes + 1)
        keyintLimit = keyFrameLimit + m_param->gopLookahead;
        keyintLimit = keyFrameLimit;
    //根据不同情况更新 numFrames 的值,包括是否启用 VBV 预测、是否为开放式 GOP 和是否存在未决帧
    origNumFrames = numFrames = m_param->bIntraRefresh ? framecnt : X265_MIN(framecnt, keyintLimit);
    if (bIsVbvLookahead)
        numFrames = framecnt;
    else if (m_param->bOpenGOP && numFrames < framecnt)
    else if (numFrames == 0)
        frames[1]->sliceType = X265_TYPE_I;
    if (m_bBatchMotionSearch)
    {   //创建一个CostEstimateGroup对象estGroup,该对象用于存储成本估计,使用嵌套循环遍历帧(frames)中的每个参考帧(b)和其之前的帧(p0),以及其之后的帧(p1),并添加到estGroup中进行运动搜索
        /* pre-calculate all motion searches, using many worker threads */
        CostEstimateGroup estGroup(*this, frames);
        for (int b = 2; b < numFrames; b++)
        {   //这个循环仅增加前后帧距离相等的参考关系
            for (int i = 1; i <= m_param->bframes + 1; i++)
                int p0 = b - i;
                if (p0 < 0)

                /* Skip search if already done */
                if (frames[b]->lowresMvs[0][i][0].x != 0x7FFF)

                /* perform search to p1 at same distance, if possible */
                int p1 = b + i;
                if (p1 >= numFrames || frames[b]->lowresMvs[1][i][0].x != 0x7FFF)
                    p1 = b;

                estGroup.add(p0, p1, b);
        /* auto-disable after the first batch if pool is small */
        m_bBatchMotionSearch &= m_pool->m_numWorkers >= 4;

        if (m_bBatchFrameCosts)
        {   //这边在上面的前后帧距离相等的基础上,再补充其他的组合方式
            /* pre-calculate all frame cost estimates, using many worker threads */
            for (int b = 2; b < numFrames; b++)
                for (int i = 1; i <= m_param->bframes + 1; i++)
                    if (b < i)

                    /* only measure frame cost in this pass if motion searches
                     * are already done */
                    if (frames[b]->lowresMvs[0][i][0].x == 0x7FFF)

                    int p0 = b - i;

                    for (int j = 0; j <= m_param->bframes; j++)
                        int p1 = b + j;
                        if (p1 >= numFrames)

                        /* ensure P1 search is done */
                        if (j && frames[b]->lowresMvs[1][j][0].x == 0x7FFF)

                        /* ensure frame cost is not done */
                        if (frames[b]->costEst[i][j] >= 0)

                        estGroup.add(p0, p1, b);

            /* auto-disable after the first batch if the pool is not large */
            m_bBatchFrameCosts &= m_pool->m_numWorkers > 12;

    int numBFrames = 0;
    int numAnalyzed = numFrames;
    bool isScenecut = false;

    if (m_param->bHistBasedSceneCut)
        isScenecut = histBasedScenecut(frames, 0, 1, origNumFrames);
        isScenecut = scenecut(frames, 0, 1, true, origNumFrames);

    /* When scenecut threshold is set, use scenecut detection for I frame placements */
    if (m_param->scenecutThreshold && isScenecut)
    {   //将第二帧的 sliceType 设置为关键帧(I 帧)类型,并返回
        frames[1]->sliceType = X265_TYPE_I;
    if (m_param->gopLookahead && (keyFrameLimit >= 0) && (keyFrameLimit <= m_param->bframes + 1))
        bool sceneTransition = m_isSceneTransition;
        m_extendGopBoundary = false;
        for (int i = m_param->bframes + 1; i < origNumFrames; i += m_param->bframes + 1)
            scenecut(frames, i, i + 1, true, origNumFrames);

            for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, origNumFrames); j++)
                if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true))
                    m_extendGopBoundary = true;
            if (m_extendGopBoundary)
        m_isSceneTransition = sceneTransition;
    if (m_param->bframes)
        if (m_param->bFrameAdaptive == X265_B_ADAPT_TRELLIS)
            if (numFrames > 1)
            {   //并初始化第一行为空字符串,第二行为"P"
                char best_paths[X265_BFRAME_MAX + 1][X265_LOOKAHEAD_MAX + 1] = { "", "P" };
                int best_path_index = numFrames % (X265_BFRAME_MAX + 1);
                /* Perform the frame type analysis. */
                for (int j = 2; j <= numFrames; j++)
                    slicetypePath(frames, j, best_paths);
                numBFrames = (int)strspn(best_paths[best_path_index], "B");
                /* Load the results of the analysis into the frame types. */
                for (int j = 1; j < numFrames; j++)
                    frames[j]->sliceType = best_paths[best_path_index][j - 1] == 'B' ? X265_TYPE_B : X265_TYPE_P;
            frames[numFrames]->sliceType = X265_TYPE_P;
        else if (m_param->bFrameAdaptive == X265_B_ADAPT_FAST)
            CostEstimateGroup estGroup(*this, frames);

            int64_t cost1p0, cost2p0, cost1b1, cost2p1;

            for (int i = 0; i <= numFrames - 2; )
                cost2p1 = estGroup.singleCost(i + 0, i + 2, i + 2, true);
                if (frames[i + 2]->intraMbs[2] > cuCount / 2)
                    frames[i + 1]->sliceType = X265_TYPE_P;
                    frames[i + 2]->sliceType = X265_TYPE_P;
                    i += 2;

                cost1b1 = estGroup.singleCost(i + 0, i + 2, i + 1);
                cost1p0 = estGroup.singleCost(i + 0, i + 1, i + 1);
                cost2p0 = estGroup.singleCost(i + 1, i + 2, i + 2);

                if (cost1p0 + cost2p0 < cost1b1 + cost2p1)
                    frames[i + 1]->sliceType = X265_TYPE_P;
                    i += 1;

// arbitrary and untuned
#define INTER_THRESH 300
#define P_SENS_BIAS (50 - m_param->bFrameBias)
                frames[i + 1]->sliceType = X265_TYPE_B;

                int j;
                for (j = i + 2; j <= X265_MIN(i + m_param->bframes, numFrames - 1); j++)
                    int64_t pthresh = X265_MAX(INTER_THRESH - P_SENS_BIAS * (j - i - 1), INTER_THRESH / 10);
                    int64_t pcost = estGroup.singleCost(i + 0, j + 1, j + 1, true);
                    if (pcost > pthresh * cuCount || frames[j + 1]->intraMbs[j - i + 1] > cuCount / 3)
                    frames[j]->sliceType = X265_TYPE_B;

                frames[j]->sliceType = X265_TYPE_P;
                i = j;
            frames[numFrames]->sliceType = X265_TYPE_P;
            numBFrames = 0;
            while (numBFrames < numFrames && frames[numBFrames + 1]->sliceType == X265_TYPE_B)
            numBFrames = X265_MIN(numFrames - 1, m_param->bframes);
            for (int j = 1; j < numFrames; j++)
                frames[j]->sliceType = (j % (numBFrames + 1)) ? X265_TYPE_B : X265_TYPE_P;

            frames[numFrames]->sliceType = X265_TYPE_P;
        int zoneRadl = m_param->rc.zonefileCount && m_param->bResetZoneConfig ? m_param->rc.zones->zoneParam->radl : 0;
        bool bForceRADL = zoneRadl || (m_param->radl && (m_param->keyframeMax == m_param->keyframeMin));
        bool bLastMiniGop = (framecnt >= m_param->bframes + 1) ? false : true;//根据条件判断是否为最后一个小GOP
        int radl = m_param->radl ? m_param->radl : zoneRadl;
        int preRADL = m_lastKeyframe + m_param->keyframeMax - radl - 1; /*Frame preceeding RADL in POC order*/
        if (bForceRADL && (frames[0]->frameNum == preRADL) && !bLastMiniGop)
            int j = 1;
            numBFrames = m_param->radl ? m_param->radl : zoneRadl;
            for (; j <= numBFrames; j++)//循环设置帧类型为B帧,从第2帧到第numBFrames帧
                frames[j]->sliceType = X265_TYPE_B;
            frames[j]->sliceType = X265_TYPE_I;
        else /* Check scenecut and RADL on the first minigop. */
            for (int j = 1; j < numBFrames + 1; j++)
            {   //对于每个帧,检查是否满足场景切换条件或者强制使用RADL的条件,如果满足条件,将该帧的帧类型设置为P帧,并将numAnalyzed设置为当前帧的索引,并跳出循环
                if (scenecut(frames, j, j + 1, false, origNumFrames) ||
                    (bForceRADL && (frames[j]->frameNum == preRADL)))
                    frames[j]->sliceType = X265_TYPE_P;
                    numAnalyzed = j;
        resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1);
        for (int j = 1; j <= numFrames; j++)
            frames[j]->sliceType = X265_TYPE_P;

        resetStart = bKeyframe ? 1 : 2;
    if (m_param->bAQMotion)
        aqMotion(frames, bKeyframe);
    if (m_param->rc.cuTree)
        cuTree(frames, X265_MIN(numFrames, m_param->keyframeMax), bKeyframe);

    if (m_param->gopLookahead && (keyFrameLimit >= 0) && (keyFrameLimit <= m_param->bframes + 1) && !m_extendGopBoundary)
        keyintLimit = keyFrameLimit;

    if (!m_param->bIntraRefresh)
        for (int j = keyintLimit + 1; j <= numFrames; j += m_param->keyframeMax)
            frames[j]->sliceType = X265_TYPE_I;
            resetStart = X265_MIN(resetStart, j + 1);
    if (bIsVbvLookahead)
        vbvLookahead(frames, numFrames, bKeyframe);
    int maxp1 = X265_MIN(m_param->bframes + 1, origNumFrames);

    /* Restore frame types for all frames that haven't actually been decided yet. */
    for (int j = resetStart; j <= numFrames; j++)
        frames[j]->sliceType = X265_TYPE_AUTO;
        /* If any frame marked as scenecut is being restarted for sliceDecision, 
         * undo scene Transition flag */
        if (j <= maxp1 && frames[j]->bScenecut && m_isSceneTransition)
            m_isSceneTransition = false;



int64_t CostEstimateGroup::estimateFrameCost(LookaheadTLD& tld, int p0, int p1, int b, bool bIntraPenalty)
    Lowres*     fenc  = m_frames[b];
    x265_param* param = m_lookahead.m_param;
    int64_t     score = 0;

    if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 - b][0] != -1)
        score = fenc->costEst[b - p0][p1 - b];
        bool bDoSearch[2];
        bDoSearch[0] = fenc->lowresMvs[0][b - p0][0].x == 0x7FFF;
        bDoSearch[1] = p1 > b && fenc->lowresMvs[1][p1 - b][0].x == 0x7FFF;

        X265_CHECK(!(p0 < b && fenc->lowresMvs[0][b - p0][0].x == 0x7FFE), "motion search batch duplication L0\n");
        X265_CHECK(!(p1 > b && fenc->lowresMvs[1][p1 - b][0].x == 0x7FFE), "motion search batch duplication L1\n");
        if (bDoSearch[0]) fenc->lowresMvs[0][b - p0][0].x = 0x7FFE;
        if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b][0].x = 0x7FFE;

        fenc->weightedRef[b - p0].isWeighted = false;
        if (param->bEnableWeightedPred && bDoSearch[0])
            tld.weightsAnalyse(*m_frames[b], *m_frames[p0]);

        fenc->costEst[b - p0][p1 - b] = 0;
        fenc->costEstAq[b - p0][p1 - b] = 0;
        if (!m_batchMode && m_lookahead.m_numCoopSlices > 1 && ((p1 > b) || bDoSearch[0] || bDoSearch[1]))
            /* Use cooperative mode if a thread pool is available and the cost estimate is
             * going to need motion searches or bidir measurements */

            memset(&m_slice, 0, sizeof(Slice) * m_lookahead.m_numCoopSlices);

            X265_CHECK(!m_batchMode, "single CostEstimateGroup instance cannot mix batch modes\n");
            m_coop.p0 = p0;
            m_coop.p1 = p1;
            m_coop.b = b;
            m_coop.bDoSearch[0] = bDoSearch[0];
            m_coop.bDoSearch[1] = bDoSearch[1];
            m_jobTotal = m_lookahead.m_numCoopSlices;
            m_jobAcquired = 0;

            tryBondPeers(*m_lookahead.m_pool, m_jobTotal);


            for (int i = 0; i < m_lookahead.m_numCoopSlices; i++)
                fenc->costEst[b - p0][p1 - b] += m_slice[i].costEst;
                fenc->costEstAq[b - p0][p1 - b] += m_slice[i].costEstAq;
                if (p1 == b)
                    fenc->intraMbs[b - p0] += m_slice[i].intraMbs;
        {   //计算1/16分辨率下的运动矢量(MV
            /* Calculate MVs for 1/16th resolution*/
            bool lastRow;
            if (param->bEnableHME)
                lastRow = true;
                for (int cuY = m_lookahead.m_4x4Height - 1; cuY >= 0; cuY--)
                    for (int cuX = m_lookahead.m_4x4Width - 1; cuX >= 0; cuX--)
                        estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1, 1);
                    lastRow = false;
            lastRow = true;
            for (int cuY = m_lookahead.m_8x8Height - 1; cuY >= 0; cuY--)
                fenc->rowSatds[b - p0][p1 - b][cuY] = 0;

                for (int cuX = m_lookahead.m_8x8Width - 1; cuX >= 0; cuX--)
                    estimateCUCost(tld, cuX, cuY, p0, p1, b, bDoSearch, lastRow, -1, 0);

                lastRow = false;

        score = fenc->costEst[b - p0][p1 - b];

        if (b != p1)
            score = score * 100 / (130 + param->bFrameBias);

        fenc->costEst[b - p0][p1 - b] = score;

    if (bIntraPenalty)
        // arbitrary penalty for I-blocks after B-frames
        score += score * fenc->intraMbs[b - p0] / (tld.ncu * 8);

    return score;


用于估算一个Coding Unit(CU)的成本

void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme)
    Lowres *fref0 = m_frames[p0];
    Lowres *fref1 = m_frames[p1];
    Lowres *fenc  = m_frames[b];

    ReferencePlanes *wfref0 = fenc->weightedRef[b - p0].isWeighted && !hme ? &fenc->weightedRef[b - p0] : fref0;
    const int widthInCU = hme ? m_lookahead.m_4x4Width : m_lookahead.m_8x8Width;
    const int heightInCU = hme ? m_lookahead.m_4x4Height : m_lookahead.m_8x8Height;
    const int bBidir = (b < p1);
    const int cuXY = cuX + cuY * widthInCU;
    const int cuXY_4x4 = (cuX / 2) + (cuY / 2) * widthInCU / 2;
    const int cuSize = X265_LOWRES_CU_SIZE;
    const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * (hme ? fenc->lumaStride/2 : fenc->lumaStride);

    if ((bBidir || bDoSearch[0] || bDoSearch[1]) && hme)
        tld.me.setSourcePU(fenc->lowerResPlane[0], fenc->lumaStride / 2, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
    else if((bBidir || bDoSearch[0] || bDoSearch[1]) && !hme)
        tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);

    //设置一个小的偏置值lowresPenalty,用于避免由于零残差的预测块导致VBV(Video Buffering Verifier)问题
    /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
    int lowresPenalty = 4;
    int listDist[2] = { b - p0, p1 - b};

    MV mvmin, mvmax;
    int bcost = tld.me.COST_MAX;
    int listused = 0;

    // TODO: restrict to slices boundaries
    // establish search bounds that don't cross extended frame boundaries
    mvmin.x = (int32_t)(-cuX * cuSize - 8);
    mvmin.y = (int32_t)(-cuY * cuSize - 8);
    mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
    mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);
    for (int i = 0; i < 1 + bBidir; i++)
        int& fencCost = hme ? fenc->lowerResMvCosts[i][listDist[i]][cuXY] : fenc->lowresMvCosts[i][listDist[i]][cuXY];
        int skipCost = INT_MAX;

        if (!bDoSearch[i])
            COPY2_IF_LT(bcost, fencCost, listused, i + 1);

        int numc = 0;
        MV mvc[5], mvp;
        MV* fencMV = hme ? &fenc->lowerResMvs[i][listDist[i]][cuXY] : &fenc->lowresMvs[i][listDist[i]][cuXY];
        ReferencePlanes* fref = i ? fref1 : wfref0;
        //根据特定的条件填充了数组 mvc,将运动矢量存储其中
        /* Reverse-order MV prediction */
#define MVC(mv) mvc[numc++] = mv;
        if (cuX < widthInCU - 1)
        if (!lastRow)
            if (cuX > 0)
                MVC(fencMV[widthInCU - 1]);
            if (cuX < widthInCU - 1)
                MVC(fencMV[widthInCU + 1]);
        if (fenc->lowerResMvs[0][0] && !hme && fenc->lowerResMvCosts[i][listDist[i]][cuXY_4x4] > 0)
            MVC((fenc->lowerResMvs[i][listDist[i]][cuXY_4x4]) * 2);
#undef MVC

        if (!numc)
            mvp = 0;
            ALIGN_VAR_32(pixel, subpelbuf[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
            int mvpcost = MotionEstimate::COST_MAX;

            /* measure SATD cost of each neighbor MV (estimating merge analysis)
             * and use the lowest cost MV as MVP (estimating AMVP). Since all
             * mvc[] candidates are measured here, none are passed to motionEstimate */
            for (int idx = 0; idx < numc; idx++)
                intptr_t stride = X265_LOWRES_CU_SIZE;
                pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride, hme);
                int cost = tld.me.bufSATD(src, stride);
                COPY2_IF_LT(mvpcost, cost, mvp, mvc[idx]);
                /* Except for mv0 case, everyting else is likely to have enough residual to not trigger the skip. */
                if (!mvp.notZero() && bBidir)
                    skipCost = cost;

        int searchRange = m_lookahead.m_param->bEnableHME ? (hme ? m_lookahead.m_param->hmeRange[0] : m_lookahead.m_param->hmeRange[1]) : s_merange;
        /* ME will never return a cost larger than the cost @MVP, so we do not
         * have to check that ME cost is more than the estimated merge cost */
        if(!hme)//使用运动估计技术计算了 fencCost
            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, searchRange, *fencMV, m_lookahead.m_param->maxSlices);
            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, searchRange, *fencMV, m_lookahead.m_param->maxSlices, fref->lowerResPlane[0]);
        if (skipCost < 64 && skipCost < fencCost && bBidir)
            fencCost = skipCost;
            *fencMV = 0;
        }//通过调用宏 COPY2_IF_LT,将 fencCost 的值复制到 bcost
        COPY2_IF_LT(bcost, fencCost, listused, i + 1);
    if (hme)
    //如果 bBidir 为真,表示当前帧为双向预测帧(B帧),则执行双向预测的成本估计过程;否则,表示当前帧为单向预测帧(P帧),则执行单向预测的成本估计过程以及考虑帧内预测的情况
    if (bBidir) /* B, also consider bidir */
        /* NOTE: the wfref0 (weightp) is not used for BIDIR */
        //调用 fref0->lowresMC 和 fref1->lowresMC 函数,对参考帧进行亚像素运动补偿,得到两个亚像素平面 src0 和 src1
        /* avg(l0-mv, l1-mv) candidate */
        ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
        pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0, 0);
        pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1, 0);
        //创建用于存储像素平均值的缓冲区 ref
        ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
        //计算 ref的 SATD
        int bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
        COPY2_IF_LT(bcost, bicost, listused, 3);
        /* coloc candidate */
        //再次使用像素平均值函数,将 fref0->lowresPlane[0] 和 fref1->lowresPlane[0] 的像素平均值存储到 ref 缓冲区中
        src0 = fref0->lowresPlane[0] + pelOffset;
        src1 = fref1->lowresPlane[0] + pelOffset;
        primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, fref0->lumaStride, src1, fref1->lumaStride, 32);
        bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
        COPY2_IF_LT(bcost, bicost, listused, 3);
        bcost += lowresPenalty;
    else /* P, also consider intra */
        bcost += lowresPenalty;

        if (fenc->intraCost[cuXY] < bcost)
            bcost = fenc->intraCost[cuXY];
            listused = 0;
    //根据条件判断当前块是否位于帧的边缘区域,并将结果存储在布尔变量 bFrameScoreCU 中
    /* do not include edge blocks in the frame cost estimates, they are not very accurate */
    const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
                                cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
    int bcostAq;
    if (m_lookahead.m_param->rc.qgSize == 8)
        bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor8x8[cuXY] + 128) >> 8) : bcost;
        bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] +128) >> 8) : bcost;

    if (bFrameScoreCU)
    {   //具体的更新根据当前是整个帧还是分片进行不同的处理
        if (slice < 0)//如果 slice 小于零,表示当前处理的是整个帧(不是分片)
            fenc->costEst[b - p0][p1 - b] += bcost;
            fenc->costEstAq[b - p0][p1 - b] += bcostAq;
            if (!listused && !bBidir)
                fenc->intraMbs[b - p0]++;
            m_slice[slice].costEst += bcost;
            m_slice[slice].costEstAq += bcostAq;
            if (!listused && !bBidir)

    fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
    fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));



void Lookahead::vbvLookahead(Lowres **frames, int numFrames, int keyframe)
    int prevNonB = 0, curNonB = 1, idx = 0;
    while (curNonB < numFrames && IS_X265_TYPE_B(frames[curNonB]->sliceType))
    int nextNonB = keyframe ? prevNonB : curNonB;
    int nextB = prevNonB + 1;
    int nextBRef = 0, curBRef = 0;
    if (m_param->bBPyramid && curNonB - prevNonB > 1)
        curBRef = (prevNonB + curNonB + 1) / 2;
    int miniGopEnd = keyframe ? prevNonB : curNonB;
    while (curNonB <= numFrames)
    {   //对于P帧或I帧,计算其与下一个非B帧之间的预测代价(plannedSatd)和帧类型(plannedType)
        /* P/I cost: This shouldn't include the cost of nextNonB */
        if (nextNonB != curNonB)
            int p0 = IS_X265_TYPE_I(frames[curNonB]->sliceType) ? curNonB : prevNonB;
            frames[nextNonB]->plannedSatd[idx] = vbvFrameCost(frames, p0, curNonB, curNonB);
            frames[nextNonB]->plannedType[idx] = frames[curNonB]->sliceType;

            /* Save the nextNonB Cost in each B frame of the current miniGop */
            if (curNonB > miniGopEnd)
                for (int j = nextB; j < miniGopEnd; j++)
                    frames[j]->plannedSatd[frames[j]->indB] = frames[nextNonB]->plannedSatd[idx];
                    frames[j]->plannedType[frames[j]->indB++] = frames[nextNonB]->plannedType[idx];
        /* Handle the B-frames: coded order */
        if (m_param->bBPyramid && curNonB - prevNonB > 1)
            nextBRef = (prevNonB + curNonB + 1) / 2;

        for (int i = prevNonB + 1; i < curNonB; i++, idx++)
            int64_t satdCost = 0;
            int type = X265_TYPE_B;
            //如果当前非B帧之后还有B帧(curNonB - prevNonB > 1),计算B帧的预测代价和帧类型
            if (nextBRef)
                if (i == nextBRef)
                    satdCost = vbvFrameCost(frames, prevNonB, curNonB, nextBRef);
                    type = X265_TYPE_BREF;
                else if (i < nextBRef)
                    satdCost = vbvFrameCost(frames, prevNonB, nextBRef, i);
                    satdCost = vbvFrameCost(frames, nextBRef, curNonB, i);
                satdCost = vbvFrameCost(frames, prevNonB, curNonB, i);
            frames[nextNonB]->plannedSatd[idx] = satdCost;
            frames[nextNonB]->plannedType[idx] = type;
            /* Save the nextB Cost in each B frame of the current miniGop */

            for (int j = nextB; j < miniGopEnd; j++)
                if (curBRef && curBRef == i)
                if (j >= i && j !=nextBRef)
                frames[j]->plannedSatd[frames[j]->indB] = satdCost;
                frames[j]->plannedType[frames[j]->indB++] = type;
        prevNonB = curNonB;
        while (curNonB <= numFrames && IS_X265_TYPE_B(frames[curNonB]->sliceType))
    frames[nextNonB]->plannedType[idx] = X265_TYPE_AUTO;



bool Lookahead::scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames)
    /* Only do analysis during a normal scenecut check. */
    if (bRealScenecut && m_param->bframes)
        int origmaxp1 = p0 + 1;
        /* Look ahead to avoid coding short flashes as scenecuts. */
        origmaxp1 += m_param->bframes;
        int maxp1 = X265_MIN(origmaxp1, numFrames);
        bool fluctuate = false;
        bool noScenecuts = false;
        int64_t avgSatdCost = 0;
        if (frames[p0]->costEst[p1 - p0][0] > -1)
            avgSatdCost = frames[p0]->costEst[p1 - p0][0];
        int cnt = 1;
        /* Where A and B are scenes: AAAAAABBBAAAAAA
         * If BBB is shorter than (maxp1-p0), it is detected as a flash
         * and not considered a scenecut. */
        for (int cp1 = p1; cp1 <= maxp1; cp1++)
            if (!scenecutInternal(frames, p0, cp1, false))
                /* Any frame in between p0 and cur_p1 cannot be a real scenecut. */
                for (int i = cp1; i > p0; i--)
                    frames[i]->bScenecut = false;
                    noScenecuts = false;
            else if (scenecutInternal(frames, cp1 - 1, cp1, false))
            {   //判断前一帧与当前帧是否也是场景切换帧
                /* If current frame is a Scenecut from p0 frame as well as Scenecut from
                 * preceeding frame, mark it as a Scenecut */
                frames[cp1]->bScenecut = true;
                noScenecuts = true;

            /* compute average satdcost of all the frames in the mini-gop to confirm 
             * whether there is any great fluctuation among them to rule out false positives */
            X265_CHECK(frames[cp1]->costEst[cp1 - p0][0]!= -1, "costEst is not done \n");
            avgSatdCost += frames[cp1]->costEst[cp1 - p0][0];

        /* Identify possible scene fluctuations by comparing the satd cost of the frames.
         * This could denote the beginning or ending of scene transitions.
         * During a scene transition(fade in/fade outs), if fluctuate remains false,
         * then the scene had completed its transition or stabilized */
        if (noScenecuts)
            fluctuate = false;
            avgSatdCost /= cnt;
            for (int i = p1; i <= maxp1; i++)
                int64_t curCost  = frames[i]->costEst[i - p0][0];
                int64_t prevCost = frames[i - 1]->costEst[i - 1 - p0][0];
                if (fabs((double)(curCost - avgSatdCost)) > 0.1 * avgSatdCost || 
                    fabs((double)(curCost - prevCost)) > 0.1 * prevCost)//比较当前帧和前一帧的SAD成本与平均SAD成本的差异是否超过阈值的10%。如果超过阈值,将波动标志fluctuate设置为true
                    fluctuate = true;
                    if (!m_isSceneTransition && frames[i]->bScenecut)
                        m_isSceneTransition = true;//只需要检测到第一个场景切换帧即可
                        /* just mark the first scenechange in the scene transition as a scenecut. */
                        for (int j = i + 1; j <= maxp1; j++)
                            frames[j]->bScenecut = false;
                frames[i]->bScenecut = false;
        if (!fluctuate && !noScenecuts)
            m_isSceneTransition = false; /* Signal end of scene transitioning */

    if (m_param->csvLogLevel >= 2)
        int64_t icost = frames[p1]->costEst[0][0];
        int64_t pcost = frames[p1]->costEst[p1 - p0][0];
        frames[p1]->ipCostRatio = (double)icost / pcost;

    /* A frame is always analysed with bRealScenecut = true first, and then bRealScenecut = false,
       the former for I decisions and the latter for P/B decisions. It's possible that the first 
       analysis detected scenecuts which were later nulled due to scene transitioning, in which 
       case do not return a true scenecut for this frame */

    if (!frames[p1]->bScenecut)
        return false;
    return scenecutInternal(frames, p0, p1, bRealScenecut);



int64_t Lookahead::slicetypePathCost(Lowres **frames, char *path, int64_t threshold)
    int64_t cost = 0;
    int loc = 1;//初始化变量 loc 为 1,表示路径的索引位置,从第一个路径元素开始
    int cur_p = 0;//初始化变量 cur_p 为 0,表示当前p帧的索引位置

    CostEstimateGroup estGroup(*this, frames);
    //将路径指针 path 减1,这是因为第一个路径元素实际上是第二帧
    path--; /* Since the 1st path element is really the second frame */
    while (path[loc])//在循环中,遍历路径元素,直到遇到空字符结束循环
        int next_p = loc;
        /* Find the location of the next P-frame. */
        while (path[next_p] != 'P')
        //根据找到的下一个P帧位置,计算该帧的代价,并将其添加到总代价 cost 中
        /* Add the cost of the P-frame found above */
        cost += estGroup.singleCost(cur_p, next_p, next_p);

        /* Early terminate if the cost we have found is larger than the best path cost so far */
        if (cost > threshold)
        //如果启用了B帧金字塔(B-frame pyramid)且下一个P帧与当前P帧的间隔大于2,则进行特殊处理
        if (m_param->bBPyramid && next_p - cur_p > 2)
            int middle = cur_p + (next_p - cur_p) / 2;
            cost += estGroup.singleCost(cur_p, next_p, middle);

            for (int next_b = loc; next_b < middle && cost < threshold; next_b++)
                cost += estGroup.singleCost(cur_p, middle, next_b);

            for (int next_b = middle + 1; next_b < next_p && cost < threshold; next_b++)
                cost += estGroup.singleCost(middle, next_p, next_b);
        else//如果未启用B帧金字塔或间隔小于等于2,则遍历当前P帧和下一个P帧之间的每一帧,计算其代价并添加到总代价 cost 中
            for (int next_b = loc; next_b < next_p && cost < threshold; next_b++)
                cost += estGroup.singleCost(cur_p, next_p, next_b);

        loc = next_p + 1;
        cur_p = next_p;

    return cost;

14.CU tree的构建和处理Lookahead::cuTree


void Lookahead::cuTree(Lowres **frames, int numframes, bool bIntra)
    int idx = !bIntra;
    int lastnonb, curnonb = 1;
    int bframes = 0;

    double totalDuration = 0.0;
    for (int j = 0; j <= numframes; j++)
        totalDuration += (double)m_param->fpsDenom / m_param->fpsNum;

    double averageDuration = totalDuration / (numframes + 1);

    int i = numframes;

    while (i > 0 && frames[i]->sliceType == X265_TYPE_B)

    lastnonb = i;

    /* Lookaheadless MB-tree is not a theoretically distinct case; the same extrapolation could
     * be applied to the end of a lookahead buffer of any size.  However, it's most needed when
     * lookahead=0, so that's what's currently implemented. */
    if (!m_param->lookaheadDepth)
        if (bIntra)
        {   //如果没有启用前向预测(lookaheadDepth为0),则根据帧类型进行处理,设置传播代价(propagateCost)和QP偏移
            memset(frames[0]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
            if (m_param->rc.qgSize == 8)
                memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * 4 * sizeof(double));
                memcpy(frames[0]->qpCuTreeOffset, frames[0]->qpAqOffset, m_cuCount * sizeof(double));
        std::swap(frames[lastnonb]->propagateCost, frames[0]->propagateCost);
        memset(frames[0]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
        if (lastnonb < idx)
        memset(frames[lastnonb]->propagateCost, 0, m_cuCount * sizeof(uint16_t));

    CostEstimateGroup estGroup(*this, frames);

    while (i-- > idx)
    {   //从最后一个非B帧开始,向前遍历帧序列
        curnonb = i;
        while (frames[curnonb]->sliceType == X265_TYPE_B && curnonb > 0)

        if (curnonb < idx)

        estGroup.singleCost(curnonb, lastnonb, lastnonb);

        memset(frames[curnonb]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
        bframes = lastnonb - curnonb - 1;
        if (m_param->bBPyramid && bframes > 1)
            int middle = (bframes + 1) / 2 + curnonb;
            estGroup.singleCost(curnonb, lastnonb, middle);
            memset(frames[middle]->propagateCost, 0, m_cuCount * sizeof(uint16_t));
            while (i > curnonb)
                int p0 = i > middle ? middle : curnonb;
                int p1 = i < middle ? middle : lastnonb;
                if (i != middle)
                {   //从当前帧向前遍历,计算每一帧与参考帧之间的帧类型成本,并进行CU tree 遗传信息的传递操作
                    estGroup.singleCost(p0, p1, i);
                    estimateCUPropagate(frames, averageDuration, p0, p1, i, 0);

            estimateCUPropagate(frames, averageDuration, curnonb, lastnonb, middle, 1);
            while (i > curnonb)
            {   //向前遍历,计算所有帧的cost
                estGroup.singleCost(curnonb, lastnonb, i);
                estimateCUPropagate(frames, averageDuration, curnonb, lastnonb, i, 0);
        estimateCUPropagate(frames, averageDuration, curnonb, lastnonb, lastnonb, 1);
        lastnonb = curnonb;

    if (!m_param->lookaheadDepth)
        estGroup.singleCost(0, lastnonb, lastnonb);
        estimateCUPropagate(frames, averageDuration, 0, lastnonb, lastnonb, 1);
        std::swap(frames[lastnonb]->propagateCost, frames[0]->propagateCost);
    cuTreeFinish(frames[lastnonb], averageDuration, lastnonb);
    if (m_param->bBPyramid && bframes > 1 && !m_param->rc.vbvBufferSize)
        cuTreeFinish(frames[lastnonb + (bframes + 1) / 2], averageDuration, 0);


