x264中码率控制（三）rate_estimate_qscale函数

最新推荐文章于 2024-05-23 15:46:30 发布

frgfnjrgn

最新推荐文章于 2024-05-23 15:46:30 发布

阅读量1.7k

点赞数 1

分类专栏：音视频

本文链接：https://blog.csdn.net/yihuanyihuan/article/details/88740998

版权

音视频专栏收录该内容

54 篇文章 6 订阅

订阅专栏

// 依据到目前为止编码bit数估算一帧的qscale
该函数主要进行qscale的初始化和调整，是码率控制部分的核心之一，另一个是get_scale。
0、计算SATD和图像的模糊复杂度
1、在get_scale中，按复杂度采用指数模型得到qscale
rcc->last_qscale = pow( rce->blurred_complexity, 1 - rcc->qcompress )/rate_factor
2、在get_scale中根据复杂度和目标比特数调整qp
q /= rate_factor; //rcc->wanted_bits_window / rcc->cplxr_sum
3、根据已编码帧的实际比特数和目标比特数的偏差再次调整qp
overflow = x264_clip3f( 1.0 + (total_bits - wanted_bits) / abr_buffer, .5, 2 );
q *= overflow;

根据代码我们得到基本的流程图为：

static float rate_estimate_qscale( x264_t *h )
{
    float q;
    x264_ratecontrol_t *rcc = h->rc;
    ratecontrol_entry_t rce = {0};
    int pict_type = h->sh.i_type;
	// 获取统计的总bits数
    int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
                          + h->stat.i_frame_size[SLICE_TYPE_P]
                          + h->stat.i_frame_size[SLICE_TYPE_B])
                       - rcc->filler_bits_sum;  //编码完成帧的filler data大小（不计入视频大小）filler data表示填充数据，与像素点自身数据不相关

    if( rcc->b_2pass )
    {
        rce = *rcc->rce;
        if( pict_type != rce.pict_type )
        {
            x264_log( h, X264_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
                      slice_type_to_char[pict_type], slice_type_to_char[rce.pict_type] );
        }
    }

    if( pict_type == SLICE_TYPE_B )
    {
        /* B-frames don't have independent ratecontrol, but rather get the
         * average QP of the two adjacent P-frames + an offset */
        //B帧没有自己单独的码率控制机制，通过相邻P帧的QP值来得到
        // 距当前帧最近的参考帧列表0， 1里的帧的类型是否是I帧
        int i0 = IS_X264_TYPE_I(h->fref_nearest[0]->i_type);
        int i1 = IS_X264_TYPE_I(h->fref_nearest[1]->i_type);
		// 获取当前待编码帧与两最近参考帧的POC之距离
        int dt0 = abs(h->fenc->i_poc - h->fref_nearest[0]->i_poc);
        int dt1 = abs(h->fenc->i_poc - h->fref_nearest[1]->i_poc);
		//获取两最近参考帧的平均qp
        float q0 = h->fref_nearest[0]->f_qp_avg_rc;
        float q1 = h->fref_nearest[1]->f_qp_avg_rc;
        // 如果两参考帧是B帧， 并用作参考帧，将获取的平均qp各减去pb_offset的一半
        if( h->fref_nearest[0]->i_type == X264_TYPE_BREF )
            q0 -= rcc->pb_offset/2;
        if( h->fref_nearest[1]->i_type == X264_TYPE_BREF )
            q1 -= rcc->pb_offset/2;
        // 如果两最近参考帧都是I帧，则取q0, q1的平均，再加上ip_offset
        // 否则如果list0最近参考帧是I帧,则q = q0 
        // 或list1最近参考帧是I帧,则q = q1
        // 如果两帧都不是I帧，则进行插值计算q
        if( i0 && i1 )
            q = (q0 + q1) / 2 + rcc->ip_offset;
        else if( i0 )
            q = q1;
        else if( i1 )
            q = q0;
        else
            q = (q0*dt1 + q1*dt0) / (dt0 + dt1);
        // 依据当前待编码帧是否将作为参考帧，来对当前帧qp作相应调整
        if( h->fenc->b_kept_as_ref )
            q += rcc->pb_offset/2;
        else
            q += rcc->pb_offset;

        rcc->qp_novbv = q;
        q = qp2qscale( q );
        if( rcc->b_2pass )
            rcc->frame_size_planned = qscale2bits( &rce, q );
        else
            rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref[1][h->i_ref[1]-1]->i_satd );//通过P帧大小来预测B帧大小
        /* Limit planned size by MinCR */
        if( rcc->b_vbv )
            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
        h->rc->frame_size_estimated = rcc->frame_size_planned;

        /* For row SATDs */
        if( rcc->b_vbv )
            rcc->last_satd = x264_rc_analyse_slice( h );//根据mb-tree的结果重新计算frame SATD cost，计算每一行的SATD cost
        return q;
    }
    else
    {
        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate; //abr缓存，用于控制实际码流大小和目标大小的差值
        double predicted_bits = total_bits;
        if( h->i_thread_frames > 1 )    //预测当前线程中的编码帧大小
        {
            int j = h->rc - h->thread[0]->rc;
            for( int i = 1; i < h->i_thread_frames; i++ )
            {
                x264_t *t = h->thread[(j+i) % h->i_thread_frames];
                double bits = t->rc->frame_size_planned;
                if( !t->b_thread_active )
                    continue;
                bits = X264_MAX(bits, t->rc->frame_size_estimated);
                predicted_bits += bits;  //已编码码流大小+线程中编码帧的大小
            }
        }

        if( rcc->b_2pass )
        {
            double lmin = rcc->lmin[pict_type];
            double lmax = rcc->lmax[pict_type];
            double diff;

            /* Adjust ABR buffer based on distance to the end of the video. */
            if( rcc->num_entries > h->i_frame )
            {
                double final_bits = rcc->entry_out[rcc->num_entries-1]->expected_bits;
                double video_pos = rce.expected_bits / final_bits;
                double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
                abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
            }

            diff = predicted_bits - rce.expected_bits;
            q = rce.new_qscale;
            q /= x264_clip3f((abr_buffer - diff) / abr_buffer, .5, 2);
            if( h->i_frame >= rcc->fps && rcc->expected_bits_sum >= 1 )
            {
                /* Adjust quant based on the difference between
                 * achieved and expected bitrate so far */
                double cur_time = (double)h->i_frame / rcc->num_entries;
                double w = x264_clip3f( cur_time*100, 0.0, 1.0 );
                q *= pow( (double)total_bits / rcc->expected_bits_sum, w );
            }
            rcc->qp_novbv = qscale2qp( q );
            if( rcc->b_vbv )
            {
                /* Do not overflow vbv */
                double expected_size = qscale2bits( &rce, q );
                double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
                double expected_fullness = rce.expected_vbv / rcc->buffer_size;
                double qmax = q*(2 - expected_fullness);
                double size_constraint = 1 + expected_fullness;
                qmax = X264_MAX( qmax, rce.new_qscale );
                if( expected_fullness < .05 )
                    qmax = lmax;
                qmax = X264_MIN(qmax, lmax);
                while( ((expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax)) ||
                        ((expected_vbv < 0) && (q < lmax)))
                {
                    q *= 1.05;
                    expected_size = qscale2bits(&rce, q);
                    expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size;
                }
                rcc->last_satd = x264_rc_analyse_slice( h );
            }
            q = x264_clip3f( q, lmin, lmax );
        }
        else /* 1pass ABR */
        {
            /* Calculate the quantizer which would have produced the desired
             * average bitrate if it had been applied to all frames so far.
             * Then modulate that quant based on the current frame's complexity
             * relative to the average complexity so far (using the 2pass RCEQ).
             * Then bias the quant up or down if total size so far was far from
             * the target.
             1、计算出一个Qp值，如果将该值应用到当前所有的帧，则可以获得目标平均吗率
             2、根据当前帧的复杂度和平均复杂度的差距，调整QP
             3、如果total size和目标相差太多，再调整QP
             * Result: Depending on the value of rate_tolerance, there is a
             * tradeoff between quality and bitrate precision. But at large
             * tolerances, the bit distribution approaches that of 2pass. */

            double wanted_bits, overflow = 1;

            rcc->last_satd = x264_rc_analyse_slice( h );
            rcc->short_term_cplxsum *= 0.5;
            rcc->short_term_cplxcount *= 0.5;
            rcc->short_term_cplxsum += rcc->last_satd / (CLIP_DURATION(h->fenc->f_duration) / BASE_FRAME_DURATION);
            rcc->short_term_cplxcount ++;

            rce.tex_bits = rcc->last_satd;
            rce.blurred_complexity = rcc->short_term_cplxsum / rcc->short_term_cplxcount;
            rce.mv_bits = 0;
            rce.p_count = rcc->nmb;
            rce.i_count = 0;
            rce.s_count = 0;
            rce.qscale = 1;
            rce.pict_type = pict_type;
            rce.i_duration = h->fenc->i_duration;

            if( h->param.rc.i_rc_method == X264_RC_CRF )
            {
                q = get_qscale( h, &rce, rcc->rate_factor_constant, h->fenc->i_frame );
            }
            else
            {
                q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );

                /* ABR code can potentially be counterproductive in CBR, so just don't bother.
                 * Don't run it if the frame complexity is zero either. */
                if( !rcc->b_vbv_min_rate && rcc->last_satd )//不开启vbv时会增加一次调整
                {
                    // FIXME is it simpler to keep track of wanted_bits in ratecontrol_end?
                    int i_frame_done = h->i_frame;
                    double time_done = i_frame_done / rcc->fps;
                    if( h->param.b_vfr_input && i_frame_done > 0 )
                        time_done = ((double)(h->fenc->i_reordered_pts - h->i_reordered_pts_delay)) * h->param.i_timebase_num / h->param.i_timebase_den;
                    wanted_bits = time_done * rcc->bitrate;
                    if( wanted_bits > 0 )
                    {
                        abr_buffer *= X264_MAX( 1, sqrt( time_done ) );
                        overflow = x264_clip3f( 1.0 + (predicted_bits - wanted_bits) / abr_buffer, .5, 2 );
                        q *= overflow;
                    }
                }
            }

            if( pict_type == SLICE_TYPE_I && h->param.i_keyint_max > 1
                /* should test _next_ pict type, but that isn't decided yet */
                && rcc->last_non_b_pict_type != SLICE_TYPE_I )
            {
                q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm );
                q /= fabs( h->param.rc.f_ip_factor );
            }
            else if( h->i_frame > 0 )
            {
                if( h->param.rc.i_rc_method != X264_RC_CRF )
                {
                    /* Asymmetric clipping, because symmetric would prevent
                     * overflow control in areas of rapidly oscillating complexity */
                    double lmin = rcc->last_qscale_for[pict_type] / rcc->lstep;
                    double lmax = rcc->last_qscale_for[pict_type] * rcc->lstep;
                    if( overflow > 1.1 && h->i_frame > 3 )
                        lmax *= rcc->lstep;
                    else if( overflow < 0.9 )
                        lmin /= rcc->lstep;

                    q = x264_clip3f(q, lmin, lmax);
                }
            }
            else if( h->param.rc.i_rc_method == X264_RC_CRF && rcc->qcompress != 1 )
            {
                q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor );
            }
            rcc->qp_novbv = qscale2qp( q );   //没有vbv控制时的qp

            //FIXME use get_diff_limited_q() ?
            q = clip_qscale( h, pict_type, q );   //有vbv控制时的qp值
        }//end else 1 pass

        rcc->last_qscale_for[pict_type] =
        rcc->last_qscale = q;

        if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 )
            rcc->last_qscale_for[SLICE_TYPE_P] = q * fabs( h->param.rc.f_ip_factor );

        if( rcc->b_2pass )
            rcc->frame_size_planned = qscale2bits( &rce, q );
        else
            rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );

        /* Always use up the whole VBV in this case. */
        if( rcc->single_frame_vbv )
            rcc->frame_size_planned = rcc->buffer_rate;
        /* Limit planned size by MinCR */
        if( rcc->b_vbv )
            rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
        h->rc->frame_size_estimated = rcc->frame_size_planned;  //size-estimated在当前帧编码完后会更新成实际编码大小
        return q;
    }
}

整个函数可以看到，ABR模式下，qscale的初值由get_qscale函数计算得出，然后在不开启vbv的情况下，根据（predicted_bits- wanted_bits）来调整qscale；在开启vbv的情况下，使用clip_qscale( ),根据vbv buffer的状态来调整qscale.

clip_qscale（）函数
//按一阶模型根据satd估计分配的bits，结合vbv限制，修正qscale

// apply VBV constraints and clip qscale to between lmin and lmax
//按一阶模型根据satd估计分配的bits，结合vbv限制，修正qscale
static double clip_qscale( x264_t *h, int pict_type, double q )
{
    x264_ratecontrol_t *rcc = h->rc;
    double lmin = rcc->lmin[pict_type];
    double lmax = rcc->lmax[pict_type];
    if( rcc->rate_factor_max_increment )
        lmax = X264_MIN( lmax, qp2qscale( rcc->qp_novbv + rcc->rate_factor_max_increment ) );
    double q0 = q;
 
    /* B-frames are not directly subject to VBV,
     * since they are controlled by the P-frames' QPs. */
 
    if( rcc->b_vbv && rcc->last_satd > 0 )
    {
        double fenc_cpb_duration = (double)h->fenc->i_cpb_duration *
                                   h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
        /* Lookahead VBV: raise the quantizer as necessary such that no frames in
         * the lookahead overflow and such that the buffer is in a reasonable state
         * by the end of the lookahead. */
        if( h->param.rc.i_lookahead )
        {
            int terminate = 0;
 
            /* Avoid an infinite loop. */
            for( int iterations = 0; iterations < 1000 && terminate != 3; iterations++ )
            {
                double frame_q[3];
                double cur_bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
                double buffer_fill_cur = rcc->buffer_fill - cur_bits;
                double target_fill;
                double total_duration = 0;
                double last_duration = fenc_cpb_duration;
                frame_q[0] = h->sh.i_type == SLICE_TYPE_I ? q * h->param.rc.f_ip_factor : q;
                frame_q[1] = frame_q[0] * h->param.rc.f_pb_factor;
                frame_q[2] = frame_q[0] / h->param.rc.f_ip_factor;
 
                /* Loop over the planned future frames. */
                for( int j = 0; buffer_fill_cur >= 0 && buffer_fill_cur <= rcc->buffer_size; j++ )
                {//如果未来几帧都按照这个qscale设置来编码的话，全都编码完会用掉多少buffer
                    total_duration += last_duration;
                    buffer_fill_cur += rcc->vbv_max_rate * last_duration;
                    int i_type = h->fenc->i_planned_type[j];
                    int i_satd = h->fenc->i_planned_satd[j];
                    if( i_type == X264_TYPE_AUTO )
                        break;
                    i_type = IS_X264_TYPE_I( i_type ) ? SLICE_TYPE_I : IS_X264_TYPE_B( i_type ) ? SLICE_TYPE_B : SLICE_TYPE_P;
                    cur_bits = predict_size( &rcc->pred[i_type], frame_q[i_type], i_satd );
                    buffer_fill_cur -= cur_bits;
                    last_duration = h->fenc->f_planned_cpb_duration[j];
                }
                /* Try to get to get the buffer at least 50% filled, but don't set an impossible goal. */
                target_fill = X264_MIN( rcc->buffer_fill + total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.5 );
                if( buffer_fill_cur < target_fill )
                {//zhanghui:如果达不到预期，就加大qscale
                    q *= 1.01;
                    terminate |= 1;
                    continue;
                }
                /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
                target_fill = x264_clip3f( rcc->buffer_fill - total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.8, rcc->buffer_size );
                if( rcc->b_vbv_min_rate && buffer_fill_cur > target_fill )
                {//zhanghui:如果超过预期，就减小qscale
                    q /= 1.01;
                    terminate |= 2;
                    continue;
                }
                break;
            }
        }
        /* Fallback to old purely-reactive algorithm: no lookahead. */
        //if( h->param.rc.i_lookahead )
        else
        {
            if( ( pict_type == SLICE_TYPE_P ||
                ( pict_type == SLICE_TYPE_I && rcc->last_non_b_pict_type == SLICE_TYPE_I ) ) &&
                rcc->buffer_fill/rcc->buffer_size < 0.5 )
            {
                q /= x264_clip3f( 2.0*rcc->buffer_fill/rcc->buffer_size, 0.5, 1.0 );
            }
 
            /* Now a hard threshold to make sure the frame fits in VBV.
             * This one is mostly for I-frames. */
            double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
            /* For small VBVs, allow the frame to use up the entire VBV. */
            double max_fill_factor = h->param.rc.i_vbv_buffer_size >= 5*h->param.rc.i_vbv_max_bitrate / rcc->fps ? 2 : 1;
            /* For single-frame VBVs, request that the frame use up the entire VBV. */
            double min_fill_factor = rcc->single_frame_vbv ? 1 : 2;
 
            if( bits > rcc->buffer_fill/max_fill_factor )
            {
                double qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 );
                q /= qf;
                bits *= qf;
            }
            if( bits < rcc->buffer_rate/min_fill_factor )
            {
                double qf = x264_clip3f( bits*min_fill_factor/rcc->buffer_rate, 0.001, 1.0 );
                q *= qf;
            }
            q = X264_MAX( q0, q );
        }
 
        /* Check B-frame complexity, and use up any bits that would
         * overflow before the next P-frame. */
        if( h->sh.i_type == SLICE_TYPE_P && !rcc->single_frame_vbv )
        {
            int nb = rcc->bframes;
            double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
            double pbbits = bits;
            double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
            double space;
            double bframe_cpb_duration = 0;
            double minigop_cpb_duration;
            for( int i = 0; i < nb; i++ )
                bframe_cpb_duration += h->fenc->f_planned_cpb_duration[i];
 
            if( bbits * nb > bframe_cpb_duration * rcc->vbv_max_rate )
                nb = 0;
            pbbits += nb * bbits;
 
            minigop_cpb_duration = bframe_cpb_duration + fenc_cpb_duration;
            space = rcc->buffer_fill + minigop_cpb_duration*rcc->vbv_max_rate - rcc->buffer_size;
            if( pbbits < space )
            {
                q *= X264_MAX( pbbits / space, bits / (0.5 * rcc->buffer_size) );
            }
            q = X264_MAX( q0/2, q );
        }
 
        /* Apply MinCR and buffer fill restrictions */
        double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
        double frame_size_maximum = X264_MIN( rcc->frame_size_maximum, X264_MAX( rcc->buffer_fill, 0.001 ) );
        if( bits > frame_size_maximum )
            q *= bits / frame_size_maximum;
 
        if( !rcc->b_vbv_min_rate )
            q = X264_MAX( q0, q );
    }
 
    if( lmin==lmax )
        return lmin;
    else if( rcc->b_2pass )
    {
        double min2 = log( lmin );
        double max2 = log( lmax );
        q = (log(q) - min2)/(max2-min2) - 0.5;
        q = 1.0/(1.0 + exp( -4*q ));
        q = q*(max2-min2) + min2;
        return exp( q );
    }
    else
        return x264_clip3f( q, lmin, lmax );
}