【x264编码器】章节4——x264的帧内预测流程

Captain1314_李祖团

已于 2024-03-24 21:01:41 修改

阅读量3.9k

点赞数 15

分类专栏：视频编码器文章标签： windows linux h.265 视频编解码音视频

于 2024-02-25 23:15:29 首次发布

本文链接：https://blog.csdn.net/vcvdv123/article/details/136235617

版权

视频编码器专栏收录该内容

15 篇文章 25 订阅

订阅专栏

系列文章目录

HEVC视频编解码标准简介

【x264编码器】章节1——x264编码流程及基于x264的编码器demo

【x264编码器】章节2——x264的lookahead流程分析

【x264编码器】章节3——x264的码率控制

【x264编码器】章节4——x264的帧内预测流程

【x264编码器】章节5——x264的帧间预测流程

【x264编码器】章节6——x264的变换量化

【x265编码器】章节1——lookahead模块分析

【x265编码器】章节2——编码流程及基于x265的编码器demo

【x265编码器】章节3——帧内预测流程

【x265编码器】章节4——帧间预测流程

【x265编码器】章节5——x265帧间运动估计流程

【x265编码器】章节6——x265的码率控制

1.slice编码写入slices_write

2.帧内预测编码mb_analyse_intra

3.8x8帧内模式编码x264_mb_encode_i8x8

4.帧内预测率失真优化过程intra_rd

点赞、收藏，会是我继续写作的动力！赠人玫瑰，手有余香

一、x264帧内预测

x264帧内预测大体流程如下，其中16x16尺寸需要遍历4中预测模式，而8x8和4x4宏块需要遍历9种预测模式：

x264完整的流程框架如下：

二、使用步骤

1.slice编码写入slices_write

代码如下：

static intptr_t slice_write( x264_t *h )
{
    int i_skip;
    int mb_xy, i_mb_x, i_mb_y;
    /* NALUs other than the first use a 3-byte startcode.
     * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
     * Then add an extra 5 bytes just in case, to account for random NAL escapes and
     * other inaccuracies. *///计算了额外的开销（overhead_guess），该开销用于估计NAL单元的大小
    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
    int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
    int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;//如果最大片大小大于0或者使用的是CABAC，并且压缩参数配置文件小于高级别，就需要备份比特流
    int starting_bits = bs_pos(&h->out.bs);//代码保存了当前比特流的位置（starting_bits）
    int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;//确定是否需要进行去块滤波
    int b_hpel = h->fdec->b_kept_as_ref;//半像素估计
    int orig_last_mb = h->sh.i_last_mb;//保存了当前片的最后一个宏块的索引（orig_last_mb）
    int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;//线程片结束的宏块索引
    uint8_t *last_emu_check;
#define BS_BAK_SLICE_MAX_SIZE 0
#define BS_BAK_CAVLC_OVERFLOW 1
#define BS_BAK_SLICE_MIN_MBS  2
#define BS_BAK_ROW_VBV        3
    x264_bs_bak_t bs_bak[4];
    b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
    bs_realign( &h->out.bs );//并重新对齐比特流

    /* Slice *///开始编写一个片（nal_start），并设置该片的第一个宏块索引（i_first_mb）
    nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
    h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;

    /* Slice header *///初始化宏块线程
    x264_macroblock_thread_init( h );
    //并将QP设置为片中的第一个宏块的QP，以便更准确地初始化CABAC编码器
    /* Set the QP equal to the first QP in the slice for more accurate CABAC initialization. */
    h->mb.i_mb_xy = h->sh.i_first_mb;
    h->sh.i_qp = x264_ratecontrol_mb_qp( h );
    h->sh.i_qp = SPEC_QP( h->sh.i_qp );//保存当前QP值
    h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;//QP的变化值

    slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
    if( h->param.b_cabac )
    {
        /* alignment needed *///进行对齐操作
        bs_align_1( &h->out.bs );

        /* init cabac *///初始化CABAC上下文和编码器
        x264_cabac_context_init( h, &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
        x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
        last_emu_check = h->cabac.p;
    }
    else
        last_emu_check = h->out.bs.p;
    h->mb.i_last_qp = h->sh.i_qp;
    h->mb.i_last_dqp = 0;
    h->mb.field_decoding_flag = 0;//将场解码标志设置为0

    i_mb_y = h->sh.i_first_mb / h->mb.i_mb_width;
    i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
    i_skip = 0;

    while( 1 )
    {   //计算当前宏块的索引（mb_xy）和比特流的位置（mb_spos）
        mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
        int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);

        if( i_mb_x == 0 )//当前宏块是一行的第一个宏块
        {   //检查比特流缓冲区是否需要重新分配空间
            if( bitstream_check_buffer( h ) )
                return -1;
            if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )//如果不是SLICE_MBAFF（宏块自适应帧/场），并且码率控制参数中设置了VBV缓冲区大小，则备份比特流（bitstream_backup）以保存当前位置
                bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 );
            if( !h->mb.b_reencode_mb )//如果不是重新编码的宏块，则进行滤波处理
                fdec_filter_row( h, i_mb_y, 0 );
        }

        if( back_up_bitstream )
        {   //如果使用CABAC编码且使用CABAC备份比特流，则备份比特流
            if( back_up_bitstream_cavlc )
                bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
            if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
            {   //如果设置了最大片大小且不是SLICE_MBAFF，备份比特流，如果当前宏块是片中最后的宏块，且剩余的宏块数量等于片最小宏块数
                bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
                if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
                    bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
            }
        }

        if( PARAM_INTERLACED )//是否使用交错编码
        {
            if( h->mb.b_adaptive_mbaff )
            {
                if( !(i_mb_y&1) )
                {
                    /* FIXME: VSAD is fast but fairly poor at choosing the best interlace type. */
                    h->mb.b_interlaced = x264_field_vsad( h, i_mb_x, i_mb_y );
                    memcpy( &h->zigzagf, MB_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
                    if( !MB_INTERLACED && (i_mb_y+2) == h->mb.i_mb_height )
                        x264_expand_border_mbpair( h, i_mb_x, i_mb_y );
                }
            }
            h->mb.field[mb_xy] = MB_INTERLACED;
        }

        /* load cache */
        if( SLICE_MBAFF )
            x264_macroblock_cache_load_interlaced( h, i_mb_x, i_mb_y );
        else
            x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y );
        //宏块分析
        x264_macroblock_analyse( h );

        /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
reencode:
        x264_macroblock_encode( h );

        if( h->param.b_cabac )//如果使用CABAC编码
        {   //如果当前宏块不是第一个宏块且不是SLICE_MBAFF的奇数行，进行CABAC编码的终止处理
            if( mb_xy > h->sh.i_first_mb && !(SLICE_MBAFF && (i_mb_y&1)) )
                x264_cabac_encode_terminal( &h->cabac );
            //如果当前宏块是跳过类型的，调用CABAC编码的宏块跳过处理
            if( IS_SKIP( h->mb.i_type ) )
                x264_cabac_mb_skip( h, 1 );
            else
            {   //如果片类型不是I类型，调用CABAC编码的宏块跳过处理
                if( h->sh.i_type != SLICE_TYPE_I )
                    x264_cabac_mb_skip( h, 0 );
                x264_macroblock_write_cabac( h, &h->cabac );
            }
        }
        else
        {   //如果使用CAVLC编码
            if( IS_SKIP( h->mb.i_type ) )
                i_skip++;//增加跳过计数
            else
            {
                if( h->sh.i_type != SLICE_TYPE_I )
                {   //写入跳过计数（iskip）作为无运动矢量的跳过宏块的运行长度
                    bs_write_ue( &h->out.bs, i_skip );  /* skip run */
                    i_skip = 0;//清零跳过计数
                }
                x264_macroblock_write_cavlc( h );
                /* If there was a CAVLC level code overflow, try again at a higher QP. */
                if( h->mb.b_overflow )//如果发生CAVLC级别码溢出，则尝试使用更高的量化参数（QP）重新编码
                {   //增加色度QP索引
                    h->mb.i_chroma_qp = h->chroma_qp_table[++h->mb.i_qp];
                    h->mb.i_skip_intra = 0;
                    h->mb.b_skip_mc = 0;
                    h->mb.b_overflow = 0;
                    bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
                    goto reencode;
                }
            }
        }
        //使用bs_pos函数计算比特流的当前位置，并使用x264_cabac_pos函数计算CABAC编码的比特数，将它们相加得到总比特数
        int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
        int mb_size = total_bits - mb_spos;//通过减去宏块的起始比特位置（mb_spos）得到宏块的大小

        if( slice_max_size && (!SLICE_MBAFF || (i_mb_y&1)) )
        {   //如果设置了最大片大小且不是SLICE_MBAFF的奇数行
            /* Count the skip run, just in case. */
            if( !h->param.b_cabac )
                total_bits += bs_size_ue_big( i_skip );
            /* Check for escape bytes. *///检查是否存在转义字节
            uint8_t *end = h->param.b_cabac ? h->cabac.p : h->out.bs.p;
            for( ; last_emu_check < end - 2; last_emu_check++ )
                if( last_emu_check[0] == 0 && last_emu_check[1] == 0 && last_emu_check[2] <= 3 )
                {   //从上次检查转义字节的位置（last_emu_check）开始遍历比特流，检查是否出现了转义字节（0x0000 00~03）
                    slice_max_size -= 8;//如果出现了转义字节，则减去8个比特（slice_max_size -= 8）
                    last_emu_check++;//将last_emu_check增加1
                }
            /* We'll just re-encode this last macroblock if we go over the max slice size. */
            if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
            {   //如果总比特数减去起始比特数大于最大片大小，并且当前宏块不需要重新编码
                if( !x264_frame_new_slice( h, h->fdec ) )//如果无法创建新的片（slice）
                {
                    /* Handle the most obnoxious slice-min-mbs edge case: we need to end the slice
                     * because it's gone over the maximum size, but doing so would violate slice-min-mbs.
                     * If possible, roll back to the last checkpoint and try again.
                     * We could try raising QP, but that would break in the case where a slice spans multiple
                     * rows, which the re-encoding infrastructure can't currently handle. */
                    if( mb_xy <= thread_last_mb && (thread_last_mb+1-mb_xy) < h->param.i_slice_min_mbs )
                    {
                        if( thread_last_mb-h->param.i_slice_min_mbs < h->sh.i_first_mb+h->param.i_slice_min_mbs )
                        {
                            x264_log( h, X264_LOG_WARNING, "slice-max-size violated (frame %d, cause: slice-min-mbs)\n", h->i_frame );
                            slice_max_size = 0;
                            goto cont;
                        }//如果可行，回滚到上一个检查点（checkpoint）并重新尝试
                        bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 );
                        h->mb.b_reencode_mb = 1;
                        h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs;
                        break;
                    }
                    if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb )
                    {   //如果可行，回滚到上一个检查点（checkpoint）并重新尝试
                        bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
                        h->mb.b_reencode_mb = 1;
                        if( SLICE_MBAFF )
                        {
                            // set to bottom of previous mbpair
                            if( i_mb_x )//更新最后一个宏块的索引
                                h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1));
                            else
                                h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1;
                        }
                        else
                            h->sh.i_last_mb = mb_xy-1;
                        break;
                    }
                    else
                        h->sh.i_last_mb = mb_xy;
                }
                else
                    slice_max_size = 0;
            }
        }
cont:   //将h->mb.b_reencode_mb设置为0，表示当前宏块不需要重新编码
        h->mb.b_reencode_mb = 0;

        /* save cache *///保存宏块的缓存状态
        x264_macroblock_cache_save( h );
        //通过调用x264_ratecontrol_mb函数对宏块进行码率控制，如果返回值小于0，则表示码率控制失败
        if( x264_ratecontrol_mb( h, mb_size ) < 0 )
        {   //恢复比特流到之前备份的位置（bitstream_restore函数）
            bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 );
            h->mb.b_reencode_mb = 1;//将h->mb.b_reencode_mb设置为1，表示需要重新编码宏块
            i_mb_x = 0;
            i_mb_y = i_mb_y - SLICE_MBAFF;
            h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1;
            h->sh.i_last_mb = orig_last_mb;//继续下一个循环，处理下一个宏块
            continue;
        }

        /* accumulate mb stats */
        h->stat.frame.i_mb_count[h->mb.i_type]++;//更新帧的宏块计数

        int b_intra = IS_INTRA( h->mb.i_type );
        int b_skip = IS_SKIP( h->mb.i_type );
        if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
        {   ///根据配置的日志级别和统计信息写入标志，更新宏块分区和参考帧计数
            if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
            {
                if( h->mb.i_partition != D_8x8 )
                        h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
                    else
                        for( int i = 0; i < 4; i++ )
                            h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
                if( h->param.i_frame_reference > 1 )
                    for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
                        for( int i = 0; i < 4; i++ )
                        {
                            int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
                            if( i_ref >= 0 )
                                h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
                        }
            }
        }

        if( h->param.i_log_level >= X264_LOG_INFO )
        {
            if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
            {
                if( CHROMA444 )
                {
                    for( int i = 0; i < 4; i++ )
                        if( h->mb.i_cbp_luma & (1 << i) )
                            for( int p = 0; p < 3; p++ )
                            {
                                int s8 = i*4+p*16;
                                int nnz8x8 = M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+0] )
                                           | M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+8] );
                                h->stat.frame.i_mb_cbp[!b_intra + p*2] += !!nnz8x8;
                            }
                }
                else
                {   //计算亮度cbp的总和
                    int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
                               + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
                    h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
                    h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
                    h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
                }
            }
            if( h->mb.i_cbp_luma && !b_intra )
            {
                h->stat.frame.i_mb_count_8x8dct[0] ++;
                h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
            }
            if( b_intra && h->mb.i_type != I_PCM )
            {   //如果是帧内预测（b_intra为真）且宏块类型不是I_PCM，根据预测模式更新统计信息
                if( h->mb.i_type == I_16x16 )
                    h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
                else if( h->mb.i_type == I_8x8 )
                    for( int i = 0; i < 16; i += 4 )
                        h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
                else //if( h->mb.i_type == I_4x4 )
                    for( int i = 0; i < 16; i++ )
                        h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
                h->stat.frame.i_mb_pred_mode[3][x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode]]++;
            }
            h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
        }

        /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
        if( b_deblock )//检查是否需要进行去块滤波
            x264_macroblock_deblock_strength( h );//计算去块滤波的强度值
        //检查当前宏块是否是片的最后一个宏块（mb_xy == h->sh.i_last_mb）。如果是最后一个宏块，跳出循环
        if( mb_xy == h->sh.i_last_mb )
            break;
        //根据片的类型（SLICE_MBAFF）更新当前宏块的坐标（i_mb_x和i_mb_y）
        if( SLICE_MBAFF )
        {
            i_mb_x += i_mb_y & 1;
            i_mb_y ^= i_mb_x < h->mb.i_mb_width;
        }
        else
            i_mb_x++;
        if( i_mb_x == h->mb.i_mb_width )
        {   //检查是否遍历完了一行宏块。如果是，将i_mb_y加1，i_mb_x重置为0
            i_mb_y++;
            i_mb_x = 0;
        }
    }//当完成所有宏块的处理后，如果h->sh.i_last_mb小于h->sh.i_first_mb，返回0
    if( h->sh.i_last_mb < h->sh.i_first_mb )
        return 0;
    //更新输出NAL单元的最后一个宏块索引
    h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
    //根据参数配置选择使用CABAC编码还是CAVLC编码
    if( h->param.b_cabac )
    {   //调用x264_cabac_encode_flush函数将剩余的CABAC上下文进行编码，并将输出结果赋给输出比特流
        x264_cabac_encode_flush( h, &h->cabac );
        h->out.bs.p = h->cabac.p;
    }
    else
    {
        if( i_skip > 0 )
            bs_write_ue( &h->out.bs, i_skip );  /* last skip run */
        /* rbsp_slice_trailing_bits */
        bs_rbsp_trailing( &h->out.bs );
        bs_flush( &h->out.bs );
    }//NAL单元的结束标记生成失败
    if( nal_end( h ) )
        return -1;
    //接下来，如果当前切片的最后一个宏块是整个线程切片的最后一个宏块
    if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) )
    {   //计算杂项比特数
        h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
                                  + (h->out.i_nal*NALU_OVERHEAD * 8)
                                  - h->stat.frame.i_tex_bits
                                  - h->stat.frame.i_mv_bits;
        fdec_filter_row( h, h->i_threadslice_end, 0 );//对当前切片的最后一行进行滤波
        //如果启用了分片线程
        if( h->param.b_sliced_threads )
        {
            /* Tell the main thread we're done. */
            x264_threadslice_cond_broadcast( h, 1 );//向主线程广播当前线程已完成
            /* Do hpel now *///进行水平像素预测滤波
            for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ )
                fdec_filter_row( h, mb_y, 1 );
            x264_threadslice_cond_broadcast( h, 2 );//等待所有线程完成hpel滤波
            /* Do the first row of hpel, now that the previous slice is done */
            if( h->i_thread_idx > 0 )
            {   //如果当前线程不是第一个线程，等待前一个线程完成
                x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 );
                fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 );
            }
        }
        //如果启用了分片线程并且当前线程是最后一个线程，释放宏块信息（mb_info）的内存
        /* Free mb info after the last thread's done using it */
        if( h->fdec->mb_info_free && (!h->param.b_sliced_threads || h->i_thread_idx == (h->param.i_threads-1)) )
        {
            h->fdec->mb_info_free( h->fdec->mb_info );
            h->fdec->mb_info = NULL;
            h->fdec->mb_info_free = NULL;
        }
    }

    return 0;
}

2.帧内预测编码mb_analyse_intra

分别进行16x16、8x8、4x4尺寸的帧内预测模式遍历，通过SATD选取最优预测模式，其中：
16x16尺寸根据左边、上边以及左上是否有数据，需要最多遍历以下四种模式：I_PRED_16x16_V 垂直模式、I_PRED_16x16_H 水平模式、I_PRED_16x16_DC 均值模式和I_PRED_16x16_P 平面模式；

8x8尺寸和4x4尺寸最多遍历以下9种模式：I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU；

代码如下：

static void mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
    static const int8_t intra_analysis_shortcut[2][2][2][5] =
    {
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}},
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {-1, -1, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDR, I_PRED_4x4_VR, -1, -1, -1}}},
    };

    int idx;
    int lambda = a->i_lambda;

    /*---------------- Try all mode and calculate their score ---------------*/
    /* Disabled i16x16 for AVC-Intra compat */
    if( !h->param.i_avcintra_class )
    {   //根据当前切片的类型（I帧或其他类型），确定使用的预测模式的范围（predict_16x16_mode_available函数返回可用的预测模式数组）
        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );

        /* Not heavily tuned *///然后，根据是否启用快速帧内模式（a->b_fast_intra），设置阈值参数（i16x16_thresh）来决定是否尝试16x16帧内预测模式
        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
        //如果不是无损编码（!h->mb.b_lossless）并且预测模式中包含I_PRED_16x16_P（平面模式）
        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
        {   //使用h->pixf.intra_mbcmp_x3_16x16函数计算16x16帧内预测模式的SATD
            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
            //更新16x16帧内预测模式的SATD值和预测模式索引
            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
            if( a->i_satd_i16x16 <= i16x16_thresh )
            {
                h->predict_16x16[I_PRED_16x16_P]( p_dst );
                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_src, FENC_STRIDE, p_dst, FDEC_STRIDE );
                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
            }
        }
        else
        {
            for( ; *predict_mode >= 0; predict_mode++ )
            {   //并根据不同的预测模式计算编码开销
                int i_satd;
                int i_mode = *predict_mode;

                if( h->mb.b_lossless )
                    x264_predict_lossless_16x16( h, 0, i_mode );
                else
                    h->predict_16x16[i_mode]( p_dst );

                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_src, FENC_STRIDE, p_dst, FDEC_STRIDE ) +
                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
                a->i_satd_i16x16_dir[i_mode] = i_satd;
            }
        }

        if( h->sh.i_type == SLICE_TYPE_B )
            /* cavlc mb type prefix *///根据表格i_mb_b_cost_table计算cavlc mb类型前缀的编码开销，并将其添加到16x16帧内预测模式的SATD值中
            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
        //如果16x16帧内预测模式的SATD值超过阈值i16x16_thresh，则返回，否则继续进行其他帧内预测模式的分析
        if( a->i_satd_i16x16 > i16x16_thresh )
            return;
    }

    uint16_t *cost_i4x4_mode = h->cost_table->i4x4_mode[a->i_qp] + 8;
    /* 8x8 prediction selection *///根据是否启用8x8帧内模式（flags & X264_ANALYSE_I8x8），设置阈值参数（i_satd_thresh）来决定是否尝试8x8帧内预测模式
    if( flags & X264_ANALYSE_I8x8 )
    {   //创建了一个名为edge的32字节对齐的像素数组（即pixel类型的名为edge的36个元素的数组）。该数组用于存储边缘像素数据
        ALIGNED_ARRAY_32( pixel, edge,[36] );
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];//用于计算8x8帧内预测块与原始块之间的像素比较值
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );//根据a->i_mbrd的值来决定i_satd_thresh的取值
        //将i_cost初始化为lambda * 4，其中lambda是一个常数，用于乘以4作为基本预测模式成本
        // FIXME some bias like in i4x4?
        int i_cost = lambda * 4; /* base predmode costs */
        h->mb.i_cbp_luma = 0;

        if( h->sh.i_type == SLICE_TYPE_B )
            i_cost += lambda * i_mb_b_cost_table[I_8x8];//这里使用了i_mb_b_cost_table数组来获取特定预测模式的成本

        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;//通过指针p_src_by和p_dst_by分别指向输入源图像和输出目标图像中当前8x8块的位置
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );//根据索引4*idx使用x264_mb_predict_intra4x4_mode函数预测当前8x8块的帧内4x4预测模式，并将结果存储在i_pred_mode中
            //使用predict_8x8_mode_available函数获取可用的8x8预测模式数组
            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );//调用h->predict_8x8_filter函数对当前8x8块进行预测滤波，将结果存储在p_dst_by中

            if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
            {
                /* No shortcuts here. The SSSE3 implementation of intra_mbcmp_x9 is fast enough. */
                i_best = h->pixf.intra_mbcmp_x9_8x8( p_src_by, p_dst_by, edge, cost_i4x4_mode-i_pred_mode, a->i_satd_i8x8_dir[idx] );
                i_cost += i_best & 0xffff;
                i_best >>= 16;
                a->i_predict8x8[idx] = i_best;
                if( idx == 3 || i_cost > i_satd_thresh )
                    break;
                x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, i_best );
            }
            else
            {
                if( !h->mb.b_lossless && predict_mode[5] >= 0 )//如果不是无损编码，并且预测模式数组中的第5个元素大于等于0
                {   //创建一个名为satd的16字节对齐的数组，用于存储不同预测模式下的SATD值
                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                    h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );//调用h->pixf.intra_mbcmp_x3_8x8函数计算原始块和边缘块之间的像素比较值，并将结果存储在satd数组中
                    int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];//根据satd[I_PRED_4x4_H]和satd[I_PRED_4x4_V]的值判断偏向垂直方向还是水平方向
                    if( i_pred_mode < 3 )
                        satd[i_pred_mode] -= 3 * lambda;
                    for( int i = 2; i >= 0; i-- )
                    {   //遍历satd数组，计算每个预测模式的成本，并更新最佳成本i_best和对应的预测模式索引
                        int cost = satd[i];
                        a->i_satd_i8x8_dir[idx][i] = cost + 4 * lambda;
                        COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                    }

                    /* Take analysis shortcuts: don't analyse modes that are too
                     * far away direction-wise from the favored mode. */
                    if( a->i_mbrd < 1 + a->b_fast_intra )
                        predict_mode = intra_analysis_shortcut[a->b_avoid_topright][predict_mode[8] >= 0][favor_vertical];
                    else
                        predict_mode += 3;
                }

                for( ; *predict_mode >= 0 && (i_best >= 0 || a->i_mbrd >= 2); predict_mode++ )
                {
                    int i_satd;
                    int i_mode = *predict_mode;

                    if( h->mb.b_lossless )
                        x264_predict_lossless_8x8( h, p_dst_by, 0, idx, i_mode, edge );
                    else//根据当前预测模式i_mode调用相应的预测函数对当前8x8块进行预测，并将结果存储在p_dst_by中
                        h->predict_8x8[i_mode]( p_dst_by, edge );
                    //调用sa8d函数计算预测块和原始块之间的SA8D值，并将结果存储在i_satd中
                    i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );
                    if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )//如果当前预测模式与预测模式数组中的预测模式一致
                        i_satd -= 3 * lambda;

                    COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode );
                    a->i_satd_i8x8_dir[idx][i_mode] = i_satd + 4 * lambda;
                }
                i_cost += i_best + 3*lambda;//更新总的编码开销i_cost，将i_best加到i_cost中
                //如果idx等于3（表示已经分析了4个8x8块）或者i_cost大于i_satd_thresh（编码开销超过阈值），则跳出循环
                if( idx == 3 || i_cost > i_satd_thresh )
                    break;
                if( h->mb.b_lossless )
                    x264_predict_lossless_8x8( h, p_dst_by, 0, idx, a->i_predict8x8[idx], edge );
                else
                    h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge );//调用相应的预测函数对当前8x8块进行预测，并将结果存储在p_dst_by中
                x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );//函数将最佳预测模式应用于2x2块的宏块缓存
            }
            /* we need to encode this block now (for next ones) *///调用x264_mb_encode_i8x8函数对当前8x8块进行编码
            x264_mb_encode_i8x8( h, 0, idx, a->i_qp, a->i_predict8x8[idx], edge, 0 );
        }

        if( idx == 3 )
        {   //将总的SA8D值i_cost存储在a->i_satd_i8x8中
            a->i_satd_i8x8 = i_cost;
            if( h->mb.i_skip_intra )
            {   //将当前8x8块的预测结果复制到帧解码缓冲区的相应位置
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
                h->mb.pic.i8x8_nnz_buf[0] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] );//更新宏块的非零系数缓冲区，将预测结果的非零系数统计值存储在对应的位置
                h->mb.pic.i8x8_nnz_buf[1] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
                h->mb.pic.i8x8_nnz_buf[2] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
                h->mb.pic.i8x8_nnz_buf[3] = M32( &h->mb.cache.non_zero_count[x264_scan8[10]] );
                h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma;//更新宏块的色度编码块模式，将宏块的亮度CBP值存储在h->mb.pic.i8x8_cbp中
                if( h->mb.i_skip_intra == 2 )//则将宏块的离散余弦变换系数复制到宏块的离散余弦变换缓冲区中
                    h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
            }
        }
        else
        {
            static const uint16_t cost_div_fix8[3] = {1024,512,341};
            a->i_satd_i8x8 = COST_MAX; //将a->i_satd_i8x8初始化为最大的成本值
            i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
        }
        /* Not heavily tuned *///如果启用了提前终止
        static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
        if( a->b_early_terminate && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
            return;
    }

    /* 4x4 prediction selection */
    if( flags & X264_ANALYSE_I4x4 )
    {   //初始化总的成本i_cost
        int i_cost = lambda * (24+16); /* 24from JVT (SATD0), 16 from base predmode costs */
        int i_satd_thresh = a->b_early_terminate ? X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 ) : COST_MAX;
        h->mb.i_cbp_luma = 0;

        if( a->b_early_terminate && a->i_mbrd )//如果提前终止启用并且i_mbrd
            i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;

        if( h->sh.i_type == SLICE_TYPE_B )//如果当前帧的类型为B帧
            i_cost += lambda * i_mb_b_cost_table[I_4x4];

        for( idx = 0;; idx++ )//对每个4x4块进行处理，其中idx表示块的索引
        {
            pixel *p_src_by = p_src + block_idx_xy_fenc[idx];
            pixel *p_dst_by = p_dst + block_idx_xy_fdec[idx];
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );//根据块的索引idx和邻域信息，确定预测模式i_pred_mode
            //选择相应的预测模式数组predict_mode
            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );

            if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
                MPIXEL_X4( &p_dst_by[4 - FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst_by[3 - FDEC_STRIDE] );

            if( h->pixf.intra_mbcmp_x9_4x4 && predict_mode[8] >= 0 )
            {
                /* No shortcuts here. The SSSE3 implementation of intra_mbcmp_x9 is fast enough. */
                i_best = h->pixf.intra_mbcmp_x9_4x4( p_src_by, p_dst_by, cost_i4x4_mode-i_pred_mode );
                i_cost += i_best & 0xffff;
                i_best >>= 16;
                a->i_predict4x4[idx] = i_best;
                if( i_cost > i_satd_thresh || idx == 15 )
                    break;
                h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = i_best;
            }
            else
            {
                if( !h->mb.b_lossless && predict_mode[5] >= 0 )//如果不是无损编码且预测模式数组中的第5个模式大于等于0
                {
                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                    h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd );//使用intra_mbcmp_x3_4x4函数计算预测模式与目标块之间的成本
                    int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                    if( i_pred_mode < 3 )
                        satd[i_pred_mode] -= 3 * lambda;
                    i_best = satd[I_PRED_4x4_DC]; a->i_predict4x4[idx] = I_PRED_4x4_DC;//根据成本选择最佳的预测模式，并将其存储在a->i_predict4x4[idx]中
                    COPY2_IF_LT( i_best, satd[I_PRED_4x4_H], a->i_predict4x4[idx], I_PRED_4x4_H );
                    COPY2_IF_LT( i_best, satd[I_PRED_4x4_V], a->i_predict4x4[idx], I_PRED_4x4_V );

                    /* Take analysis shortcuts: don't analyse modes that are too
                     * far away direction-wise from the favored mode. */
                    if( a->i_mbrd < 1 + a->b_fast_intra )
                        predict_mode = intra_analysis_shortcut[a->b_avoid_topright][predict_mode[8] >= 0][favor_vertical];
                    else
                        predict_mode += 3;
                }

                if( i_best > 0 )
                {
                    for( ; *predict_mode >= 0; predict_mode++ )
                    {
                        int i_satd;
                        int i_mode = *predict_mode;

                        if( h->mb.b_lossless )
                            x264_predict_lossless_4x4( h, p_dst_by, 0, idx, i_mode );
                        else
                            h->predict_4x4[i_mode]( p_dst_by );
                        //根据最佳的预测模式进行预测，并计算预测模式与源块之间的成本
                        i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_src_by, FENC_STRIDE, p_dst_by, FDEC_STRIDE );
                        if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
                        {
                            i_satd -= lambda * 3;
                            if( i_satd <= 0 )
                            {
                                i_best = i_satd;//更新总成本i_cost和预测模式a->i_predict4x4[idx]
                                a->i_predict4x4[idx] = i_mode;
                                break;
                            }
                        }

                        COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode );
                    }
                }

                i_cost += i_best + 3 * lambda;
                if( i_cost > i_satd_thresh || idx == 15 )
                    break;//如果总成本超过了SATD阈值或者处理到了最后一个块（idx == 15），则退出循环
                if( h->mb.b_lossless )
                    x264_predict_lossless_4x4( h, p_dst_by, 0, idx, a->i_predict4x4[idx] );
                else
                    h->predict_4x4[a->i_predict4x4[idx]]( p_dst_by );
                h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
            }
            /* we need to encode this block now (for next ones) *///编码
            x264_mb_encode_i4x4( h, 0, idx, a->i_qp, a->i_predict4x4[idx], 0 );
        }
        if( idx == 15 )//如果循环结束时idx等于15，表示已经处理完了所有的块
        {   //将总的SA4D值i_cost存储在a->i_satd_i4x4中，并进行一些后续的处理（如拷贝预测结果、更新非零系数等）
            a->i_satd_i4x4 = i_cost;
            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
                h->mb.pic.i4x4_nnz_buf[0] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] );
                h->mb.pic.i4x4_nnz_buf[1] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
                h->mb.pic.i4x4_nnz_buf[2] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
                h->mb.pic.i4x4_nnz_buf[3] = M32( &h->mb.cache.non_zero_count[x264_scan8[10]] );
                h->mb.pic.i4x4_cbp = h->mb.i_cbp_luma;
                if( h->mb.i_skip_intra == 2 )
                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
            }
        }
        else
            a->i_satd_i4x4 = COST_MAX;//否则，表示未能处理完所有的块，将a->i_satd_i4x4设为最大的成本值
    }
}

3.8x8帧内模式编码x264_mb_encode_i8x8

对8x8块进行编码的函数，这段代码的目的是对8x8块进行编码，包括预测、变换、量化、反量化和逆变换等过程，并更新相应的非零系数标记和非零系数个数存储位置，代码如下：

static ALWAYS_INLINE void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge, int b_predict )
{   //根据块的索引idx计算块的位置x和y，然后获取源块和目标块的指针。同时定义了一些用于存储中间结果的数组
    int x = idx&1;
    int y = idx>>1;
    int nz;
    pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
    pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
    ALIGNED_ARRAY_64( dctcoef, dct8x8,[64] );
    ALIGNED_ARRAY_32( pixel, edge_buf,[36] );
    //如果需要进行预测（b_predict为真）
    if( b_predict )
    {
        if( !edge )//如果没有提供边界信息（edge为空），则使用h->predict_8x8_filter函数进行边界滤波，并将结果存储在edge_buf数组中
        {
            h->predict_8x8_filter( p_dst, edge_buf, h->mb.i_neighbour8[idx], x264_pred_i4x4_neighbors[i_mode] );
            edge = edge_buf;
        }

        if( h->mb.b_lossless )//如果当前帧是无损编码模式
            x264_predict_lossless_8x8( h, p_dst, p, idx, i_mode, edge );
        else//调用预测函数h->predict_8x8[i_mode]进行预测
            h->predict_8x8[i_mode]( p_dst, edge );
    }

    if( h->mb.b_lossless )//如果当前帧是无损编码模式（h->mb.b_lossless为真）
    {   //调用h->zigzagf.sub_8x8函数对源块进行无损编码，并获取非零系数个数nz
        nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+idx], p_src, p_dst );
        STORE_8x8_NNZ( p, idx, nz );//更新非零系数标记h->mb.i_cbp_luma和非零系数个数存储位置
        h->mb.i_cbp_luma |= nz<<idx;
        return;
    }
    //调用h->dctf.sub8x8_dct8函数对源块进行DCT变换，并将结果存储在dct8x8数组中
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
    //调用x264_quant_8x8函数对DCT系数进行量化，并获取非零系数个数nz
    nz = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 1, p, idx );
    if( nz )
    {   //如果存在非零系数
        h->mb.i_cbp_luma |= 1<<idx;//更新非零系数标记h->mb.i_cbp_luma和非零系数个数存储位置
        h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8 );//调用h->zigzagf.scan_8x8函数对DCT系数进行扫描（Zigzag顺序）
        h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[p?CQM_8IC:CQM_8IY], i_qp );//调用h->quantf.dequant_8x8函数对量化后的DCT系数进行反量化
        h->dctf.add8x8_idct8( p_dst, dct8x8 );//调用h->dctf.add8x8_idct8函数对反量化后的DCT系数进行逆变换，并将结果存储在目标块中
        STORE_8x8_NNZ( p, idx, 1 );
    }
    else
        STORE_8x8_NNZ( p, idx, 0 );//否则，表示全零系数
}

4.帧内预测率失真优化过程intra_rd

这段代码的目的是在帧内预测中，通过计算不同预测模式下的码率失真代价，并选择代价最小的模式进行编码。同时，根据不同模式的结果更新相关的缓存和标记。代码如下：

static void intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
{   //根据早期终止标志a->b_early_terminate判断是否进行早期终止。如果不需要早期终止，则将i_satd_thresh设为最大代价值COST_MAX
    if( !a->b_early_terminate )
        i_satd_thresh = COST_MAX;
    //针对16x16帧内预测模式进行判断。如果16x16帧内预测的SATD代价小于i_satd_thresh
    if( a->i_satd_i16x16 < i_satd_thresh )
    {   //将当前宏块的类型h->mb.i_type设置为I_16x16（表示使用16x16帧内预测模式）
        h->mb.i_type = I_16x16;
        analyse_update_cache( h, a );
        a->i_satd_i16x16 = rd_cost_mb( h, a->i_lambda2 );//计算16x16帧内预测模式的码率失真代价，将结果存储在a->i_satd_i16x16中
    }
    else//否则，将a->i_satd_i16x16设为最大代价值COST_MAX，表示该模式不可用
        a->i_satd_i16x16 = COST_MAX;
    //针对4x4帧内预测模式进行判断。如果4x4帧内预测的SATD代价小于i_satd_thresh
    if( a->i_satd_i4x4 < i_satd_thresh )
    {   //当前宏块的类型h->mb.i_type设置为I_4x4（表示使用4x4帧内预测模式）
        h->mb.i_type = I_4x4;
        analyse_update_cache( h, a );
        a->i_satd_i4x4 = rd_cost_mb( h, a->i_lambda2 );//计算4x4帧内预测模式的码率失真代价，将结果存储在a->i_satd_i4x4中
    }
    else//将a->i_satd_i4x4设为最大代价值COST_MAX，表示该模式不可用
        a->i_satd_i4x4 = COST_MAX;
    //针对8x8帧内预测模式进行判断。如果8x8帧内预测的SATD代价小于i_satd_thresh
    if( a->i_satd_i8x8 < i_satd_thresh )
    {   //将当前宏块的类型h->mb.i_type设置为I_8x8（表示使用8x8帧内预测模式）
        h->mb.i_type = I_8x8;
        analyse_update_cache( h, a );
        a->i_satd_i8x8 = rd_cost_mb( h, a->i_lambda2 );//计算8x8帧内预测模式的码率失真代价，将结果存储在a->i_satd_i8x8中
        a->i_cbp_i8x8_luma = h->mb.i_cbp_luma;//将h->mb.i_cbp_luma赋值给a->i_cbp_i8x8_luma，表示8x8帧内预测模式下的亮度非零系数标记
    }
    else//将a->i_satd_i8x8设为最大代价值COST_MAX，表示该模式不可用
        a->i_satd_i8x8 = COST_MAX;
}