x264 - x264_mb_encode_i16x16

最新推荐文章于 2019-12-16 20:54:08 发布

fanbird2008

最新推荐文章于 2019-12-16 20:54:08 发布

阅读量578

点赞数

分类专栏： Stream Media/H264

本文链接：https://blog.csdn.net/fanbird2008/article/details/32343755

版权

Stream Media/H264 专栏收录该内容

126 篇文章 3 订阅

订阅专栏

/* All encoding functions must output the correct CBP and NNZ values.
* The entropy coding functions will check CBP first, then NNZ, before
* actually reading the DCT coefficients. NNZ still must be correct even
* if CBP is zero because of the use of NNZ values for context selection.
* "NNZ" need only be 0 or 1 rather than the exact coefficient count because
* that is only needed in CAVLC, and will be calculated by CAVLC's residual
* coding and stored as necessary. */

/* This means that decimation can be done merely by adjusting the CBP and NNZ
* rather than memsetting the coefficients. */

static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )
{
pixel *p_src = h->mb.pic.p_fenc[p];
pixel *p_dst = h->mb.pic.p_fdec[p];

    // dctcoef dct4x4[16[16]
    // dctcoef dct_dc4x4[16]
    ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
    ALIGNED_ARRAY_N( dctcoef, dct_dc4x4,[16] );

    int nz, block_cbp = 0;
    int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
    int i_quant_cat = p ? CQM_4IC : CQM_4IY;
    // test, first mb, i_mode = 6
    int i_mode = h->mb.i_intra16x16_pred_mode;

    if( h->mb.b_lossless )
        x264_predict_lossless_16x16( h, p, i_mode );
    else // h->predict_16x16[6] = x264_predict_16x16_dc_128_c
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );

    if( h->mb.b_lossless )
    {
        for( int i = 0; i < 16; i++ )
        {
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
            nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16*p+i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] );
            h->mb.cache.non_zero_count[x264_scan8[16*p+i]] = nz;
            block_cbp |= nz;
        }
        h->mb.i_cbp_luma |= block_cbp * 0xf;
        h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = array_non_zero( dct_dc4x4, 16 );
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );
        return;
    }
    // see CLEAR_16X16_nnz comments
    CLEAR_16x16_NNZ( p );

    // h->dctf.sub16x16_dct = sub16x16_dct
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );

    if( h->mb.b_noise_reduction )
        for( int idx = 0; idx < 16; idx++ )
            h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );

    // split 16 dc from 16 4x4 dctcoef,
    // pls attention the relation of corresponding
    for( int idx = 0; idx < 16; idx++ )
    {
        dct_dc4x4[block_idx_xy_1d[idx]] = dct4x4[idx][0];
        dct4x4[idx][0] = 0;
    }

    if( h->mb.b_trellis )
    {
        for( int idx = 0; idx < 16; idx++ )
            if( x264_quant_4x4_trellis( h, dct4x4[idx], i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_AC][p], 1, !!p, idx ) )
            {
                block_cbp = 0xf;
                h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );
                h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );
                if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );
                h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;
            }
    }
    else
    {
        // quant 16x16 block by 4 8x8 blocks
        for( int i8x8 = 0; i8x8 < 4; i8x8++ )
        {
            // for each 8x8 block, quant it by 4 4x4 blocks
            // h->quantf.quant_4x4x4 = quant_4x4x4
            // and each nz is or-shifted
            nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
            // if exist nonzero coef
            if( nz )
            {
                block_cbp = 0xf;
                // for nz of each 4x4 block
                FOREACH_BIT( idx, i8x8*4, nz )
                {
                    // zigzag scan dctcoef quanted, and save it to h->dct.luma4x4 as level
                    h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );
                    // dequant dctcoef of each nz 4x4 block
                    h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );
                    if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );
                    // mark nz to mb.cache.non_zero_count for each nz 4x4 block
                    h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;
                }
            }
        }
    }

    /* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */
    /* More useful with CAVLC, but still useful with CABAC. */
    if( decimate_score < 6 )
    {
        CLEAR_16x16_NNZ( p );
        block_cbp = 0;
    }
    else
        h->mb.i_cbp_luma |= block_cbp;

    // h->dctf.dct4x4dc = dct4x4dc
    // do H4 transform against 16 dc coef as one 4x4 block
    h->dctf.dct4x4dc( dct_dc4x4 );
    if( h->mb.b_trellis )
        nz = x264_quant_luma_dc_trellis( h, dct_dc4x4, i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_DC][p], 1, LUMA_DC+p );
    else // h->quantf.quant_4x4_dc = quant_4x4_dc, quant dct_dc4x4 transformed by H4
        nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[i_quant_cat][i_qp][0]>>1, h->quant4_bias[i_quant_cat][i_qp][0]<<1 );

    // save nz mark to mb's dc position of mb.cache.non_zero count
    h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = nz;
    if( nz )
    {
        // if exist nonzero dct_dc4x4 coef
        // zigzag scan this dct_dc4x4 coef, then save it to h->dct.luma16x16_dc as level
        // h->zigzagf.scan_4x4 = zigzag_scan_4x4_frame for frame encode
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );

        /* output samples to fdec */
        // h->dctf.idct4x4dc = idct4x4dc
        // do inverse dct4x4dc transform against dct_dc4x4 coef
        h->dctf.idct4x4dc( dct_dc4x4 );
        // dequant it
        // h->quantf.dequant_4x4_dc = dequant_4x4_dc
        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[i_quant_cat], i_qp ); /* XXX not inversed */
        if( block_cbp )
            for( int i = 0; i < 16; i++ ) // fill 16 dc coefs back to 16 dct4x4 blocks
                dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];
    }

    /* put pixels to fdec */
    if( block_cbp ) // reconstruct whole 16x16 block, add16x16_idct
        h->dctf.add16x16_idct( p_dst, dct4x4 );
    else if( nz )    // only reconstruct dc parts of 16x16 block, add16x16_idct_dc
        h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
}

// from scan8 layout
// first 16 entries denotes 16 4x4 Y components
// second 16 entries denotes 16 4x4 U components
// Third 16 entries denotes 16 4x4 V components
// last three entries is Ydc, Udc, Vdc
//
// p = 0, 1, 2, denote Y, U, V plane respectively
// x264_scan8[16 * p] to get corresponding position
// p plane
//

#define CLEAR_16x16_NNZ( p ) \
do\
{\
    // locate to 4x4 block 0, then let its 4 bytes equal 0
    // i.e. let placeholder 4x4 block 0, 1, 4 ,5 equal 0
    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
    // locate to 4x4 block 2, then let its 4 bytes equal 0
    // i.e. let placeholder 4x4 block 2, 3, 6, 7 equal 0
    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
    // locate to 4x4 block 8, then let its 4 bytes equal 0
    // i.e. let placeholder 4x4 block 8, 9, 12, 13 equal 0
    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
    // locate to 4x4 block 10, then let its 4 bytes equal 0
    // i.e. let placeholder 4x4 block 10, 11, 14, 15 equal 0
    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
} while(0)

/* Scan8 organization:
*    0 1 2 3 4 5 6 7
* 0 DY    y y y y y
* 1        y Y Y Y Y
* 2        y Y Y Y Y
* 3        y Y Y Y Y
* 4        y Y Y Y Y
* 5 DU    u u u u u
* 6        u U U U U
* 7        u U U U U
* 8        u U U U U
* 9        u U U U U
* 10 DV    v v v v v
* 11       v V V V V
* 12       v V V V V
* 13       v V V V V
* 14       v V V V V
* DY/DU/DV are for luma/chroma DC.
*/

#define LUMA_DC 48
#define CHROMA_DC 49

static const uint8_t x264_scan8[16*3 + 3] =
{
    4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
    6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
    4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
    6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,

    4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
    6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
    4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
    6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,

    4+11*8, 5+11*8, 4+12*8, 5+12*8,
    6+11*8, 7+11*8, 6+12*8, 7+12*8,
    4+13*8, 5+13*8, 4+14*8, 5+14*8,
    6+13*8, 7+13*8, 6+14*8, 7+14*8,

    0+ 0*8, 0+ 5*8, 0+10*8
};

fanbird2008

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
x264 - x264_mb_encode_i16x16

/* All encoding functions must output the correct CBP and NNZ values. * The entropy coding functions will check CBP first, then NNZ, before * actually reading the DCT coefficients. NNZ still must
复制链接

扫一扫