AVS3代码阅读HPM4.0

chenyuanxu

已于 2023-02-16 15:05:02 修改

阅读量2.6k

点赞数 5

分类专栏：视频编码基础文章标签： avs3 计算机视觉算法

于 2020-12-13 18:33:56 首次发布

本文链接：https://blog.csdn.net/weixin_41926958/article/details/110821482

版权

视频编码基础专栏收录该内容

6 篇文章 3 订阅

订阅专栏

文章目录

结构体
main()函数
mode_coding_tree()
mode_coding_unit()
- copy_cu_data()
- diff_16b_32nx4n_simd()

对不起各位，本篇博文鸽了好久的下一期，主要是由于本人在校期间被分到了另外的方向，所以搁置了传统编解码的学习，目前正在考虑重新回归编解码坑，博文可能会再次更新~

参考AVS3代码阅读（HPM6.0）（一.整体架构以及CU划分部分代码）
版本：HPM4.0
核心文件：app_encoder.c
在这里插入图片描述

结构体

ENC_PICO：原图缓冲结构

/*****************************************************************************
 * original picture buffer structure原图缓冲结构ENC_PICO
 *****************************************************************************/
typedef struct _ENC_PICO
{
    /* original picture store原图存储 */
    COM_PIC                pic;
    /* input picture count输入图片计数 */
    u32                     pic_icnt;
    /* be used for encoding input是否被encoding使用 */
    u8                      is_used;

    /* address of sub-picture子图的地址 */
    COM_PIC              * spic;
} ENC_PICO;

ENC_PINTRA：帧内预测结构

/*****************************************************************************
 * intra prediction structure帧内预测结构ENC_PINTRA
 *****************************************************************************/
typedef struct _ENC_PINTRA
{
    /* temporary prediction buffer 预测缓冲区*/
    pel                 pred[N_C][MAX_CU_DIM];
    pel                 pred_cache[IPD_CNT][MAX_CU_DIM]; // only for luma

    /* reconstruction buffer 重建缓冲区*/
    pel                 rec[N_C][MAX_CU_DIM];

    /* address of original (input) picture buffer 输入的原始图像缓冲区的地址*/
    pel               * addr_org[N_C];
    /* stride of original (input) picture buffer 输入的原始图像缓冲区的步长*/
    int                 stride_org[N_C];

    /* address of reconstruction picture buffer 重建的图像缓冲区的地址*/
    pel               * addr_rec_pic[N_C];
    /* stride of reconstruction picture buffer 重建的图像缓冲区的步长*/
    int                 stride_rec[N_C];

    /* QP for luma 亮度的QP*/
    u8                  qp_y;
    /* QP for chroma 色度的QP*/
    u8                  qp_u;
    u8                  qp_v;

    int                 slice_type;

    int                 complexity;
    void              * pdata[4];
    int               * ndata[4];

    int                 bit_depth;
} ENC_PINTRA;

ENC_PINTER：帧间预测结构

/*****************************************************************************
 * inter prediction structure 帧间预测结构ENC_PINTER
 *****************************************************************************/
typedef struct _ENC_PINTER ENC_PINTER;
struct _ENC_PINTER
{
    int bit_depth;
    /* temporary prediction buffer (only used for ME)预测缓冲区，仅用于ME*/
    pel  pred_buf[MAX_CU_DIM];
    /* reconstruction buffer 重建缓冲区*/
    pel  rec_buf[N_C][MAX_CU_DIM];
    //MVP:运动矢量预测
    s16  mvp_scale[REFP_NUM][MAX_NUM_ACTIVE_REF_FRAME][MV_D];
    s16  mv_scale[REFP_NUM][MAX_NUM_ACTIVE_REF_FRAME][MV_D];
    //MVR：运动矢量精度
    u8   curr_mvr;
    int  max_imv[MV_D];

    int max_search_range;

    CPMV  affine_mvp_scale[REFP_NUM][MAX_NUM_ACTIVE_REF_FRAME][VER_NUM][MV_D];
    CPMV  affine_mv_scale[REFP_NUM][MAX_NUM_ACTIVE_REF_FRAME][VER_NUM][MV_D];
    int best_mv_uni[REFP_NUM][MAX_NUM_ACTIVE_REF_FRAME][MV_D];
    pel  p_error[MAX_CU_DIM];
    int  i_gradient[2][MAX_CU_DIM];

    s16  org_bi[MAX_CU_DIM];
    s32  mot_bits[REFP_NUM];

    u8   num_refp;
    /* minimum clip value */
    s16  min_mv_offset[MV_D];
    /* maximum clip value */
    s16  max_mv_offset[MV_D];
    /* search range for int-pel*/
    s16  search_range_ipel[MV_D];
    /* search range for sub-pel*/
    s16  search_range_spel[MV_D];
    s8  (*search_pattern_hpel)[2];
    u8   search_pattern_hpel_cnt;
    s8  (*search_pattern_qpel)[2];
    u8   search_pattern_qpel_cnt;

    /* original (input) picture buffer 输入原始图像缓冲区*/
    COM_PIC        *pic_org;
    /* address of original (input) picture buffer 输入原始图像缓冲区的地址*/
    pel             *Yuv_org[N_C];
    /* stride of original (input) picture buffer 输入原始图像缓冲区的步长*/
    int              stride_org[N_C];
    /* motion vector map 运动矢量图*/
    s16            (*map_mv)[REFP_NUM][MV_D];
    /* picture width in SCU unit 子块的图像宽度*/
    int              pic_width_in_scu;
    /* QP for luma of current encoding CU 当前编码CU的亮度QP*/
    int               qp_y;
    /* QP for chroma of current encoding CU 当前编码CU的色度QP*/
    int               qp_u;
    int               qp_v;
    u32              lambda_mv;
    /* reference pictures 参考图像们*/
    COM_REFP      (*refp)[REFP_NUM];
    int              slice_type;
    /* search level for motion estimation 运动估计搜索级别*/
    int              me_level;
    int              complexity;
    void            *pdata[4];
    int             *ndata[4];
    /* current frame number 当前帧的序号*/
    int              ptr;
    /* gop size GOP的大小*/
    int              gop_size;
    /* ME function (Full-ME or Fast-ME) 运动估计的函数：是完整的运动估计还是快速运动估计*/
    u32            (*fn_me)(ENC_PINTER *pi, int x, int y, int w, int h, int cu_x, int cu_y, int cu_stride, s8 *refi, int lidx, s16 mvp[MV_D], s16 mv[MV_D], int bi);
    /* AFFINE ME function (Gradient-ME) 放射变换运动估计*/
    u32            (*fn_affine_me)(ENC_PINTER *pi, int x, int y, int cu_width_log2, int cu_height_log2, s8 *refi, int lidx, CPMV mvp[VER_NUM][MV_D], CPMV mv[VER_NUM][MV_D], int bi, int vertex_num, int sub_w, int sub_h);
};

以下两个留作以后再议，目前不是很清楚在干什么。

    /* ME function (Full-ME or Fast-ME) 运动估计的函数：是完整的运动估计还是快速运动估计*/
    u32            (*fn_me)(ENC_PINTER *pi, int x, int y, int w, int h, int cu_x, int cu_y, int cu_stride, s8 *refi, int lidx, s16 mvp[MV_D], s16 mv[MV_D], int bi);
    /* AFFINE ME function (Gradient-ME) 放射变换运动估计*/
    u32            (*fn_affine_me)(ENC_PINTER *pi, int x, int y, int cu_width_log2, int cu_height_log2, s8 *refi, int lidx, CPMV mvp[VER_NUM][MV_D], CPMV mv[VER_NUM][MV_D], int bi, int vertex_num, int sub_w, int sub_h);

ENC_PARAM：编码器参数

/* encoder parameter 编码器参数：ENC_PARAM*/
typedef struct _ENC_PARAM
{
#if FIX116_PIC_SIZE
    /* picture size of input sequence (width) 输入的视频序列的宽*/
    int            horizontal_size;
    /* picture size of input sequence (height) 输入的视频序列的高*/
    int            vertical_size;
    
    /* picture size of pictures in DPB (width) 解码图像缓冲区中的宽*/
    int            pic_width;  // be a multiple of 8 (MINI_SIZE)
    /* picture size of pictures in DPB (height) 解码图像缓冲区中的高*/
    int            pic_height; // be a multiple of 8 (MINI_SIZE)
#else
    /* picture size of input sequence (width) */
    int            pic_width;
    /* picture size of input sequence (height) */
    int            pic_height;
#endif
    /* qp value for I- and P- slice I和P的QP值*/
    int            qp;
    /* frame per second 每秒的帧数*/
    int            fps;
    /* I-frame period I帧长度*/
    int            i_period;
    /* force I-frame 这个不是很清楚哎*/
    int            f_ifrm;
    /* picture bit depth 图像的比特深度*/
    int            bit_depth_input;
    int            bit_depth_internal;
    /* use picture signature embedding 使用图片签名嵌入*/
    int            use_pic_sign;
    int            max_b_frames;
    /* start bumping process if force_output is on 如果force_output为on，则启动bumping进程。*/
    int            force_output;
    int            disable_hgop;
    int            gop_size;
    int            use_dqp;
#if USE_SLICE_DQP
    int            frame_qp_add;           /* 10 bits*/
#endif
#if IPCM
    int            ipcm_enable_flag;
#endif
    int            amvr_enable_flag;  //AMVR的使用标志 自适应运动矢量精度
    int            affine_enable_flag;  //affine的使用标志  仿射变换
    int            smvd_enable_flag;  //smvd的使用标志  对称MVD编码
    int            use_deblock;  
    int            num_of_hmvp_cand;  //HMVP的候选个数  基于历史信息的运动矢量预测
    int            ipf_flag;
#if TSCPM
    int            tscpm_enable_flag;
#endif
    int            umve_enable_flag;
#if EXT_AMVR_HMVP
    int            emvr_enable_flag;
#endif
#if DT_PARTITION
    int            dt_intra_enable_flag;
#endif
    int            wq_enable;  // 加权量化的启用与否
    int            seq_wq_mode;  //序列的加权量化模式
    char           seq_wq_user[2048];
    int            pic_wq_data_idx;
    char           pic_wq_user[2048];
    int            wq_param;  //加权量化参数
    int            wq_model;
    char           wq_param_detailed[256];
    char           wq_param_undetailed[256];

    int            sample_adaptive_offset_enable_flag;  //样点自适应偏移启用与否
    int            adaptive_leveling_filter_enable_flag;  //自适应调平滤光器启用标志
    int            secondary_transform_enable_flag;  //二次变换的启用标志
    u8             position_based_transform_enable_flag;
    u8             library_picture_enable_flag;
    u8             delta_qp_flag;
    u8             chroma_format;
    u8             encoding_precision;
#if HLS_RPL
    COM_RPL        rpls_l0[MAX_NUM_RPLS];
    COM_RPL        rpls_l1[MAX_NUM_RPLS];
    int            rpls_l0_cfg_num;
    int            rpls_l1_cfg_num;
#endif
#if PATCH
    int            patch_stable;
    int            cross_patch_loop_filter;
    int            patch_uniform;
    int            patch_ref_colocated;
    int            patch_width_in_lcu;
    int            patch_height_in_lcu;
    int            patch_columns;
    int            patch_rows;
    int            patch_column_width[64];
    int            patch_row_height[128];
#endif
#if LIBVC_ON
    int            qp_offset_libpic;
#endif
    int            sub_sample_ratio;
    int            frames_to_be_encoded;
    u8             ctu_size; //最大编码单元的size
    u8             min_cu_size;  //编码单元最小size
    u8             max_part_ratio;  
    u8             max_split_times; //最多可分割次数
    u8             min_qt_size;  
    u8             max_bt_size;
    u8             max_eqt_size;
    u8             max_dt_size;
    int            qp_offset_cb;
    int            qp_offset_cr;
    int            qp_offset_adp;
    int            bit_depth;
} ENC_PARAM;

ENC_SBAC：稍后再议

typedef struct _ENC_SBAC
{
    u32            range;
    u32            code;
    int            left_bits;
    u32            stacked_ff;
    u32            pending_byte;
    u32            is_pending_byte;
    COM_SBAC_CTX  ctx;
    u32            bitcounter;
    u8             is_bitcount;
} ENC_SBAC;

ENC_CORE：编码过程中使用的核心信息

/*****************************************************************************
 * CORE information used for encoding process.在编码过程中使用的核心信息
 *
 * The variables in this structure are very often used in encoding process.编码过程中使用非常广泛
 *****************************************************************************/
typedef struct _ENC_CORE
{
    /* mode decision structure 模式选择结构 */
    COM_MODE       mod_info_best;  //最好的模式信息
    COM_MODE       mod_info_curr;  //当前模式信息
#if TB_SPLIT_EXT
    COM_MODE       mod_info_save;
    //intra rdo copy the current best info directly into core->mod_info_best; need an internal pb_part for intra 
    //帧内rdo将当前最好的信息直接复制到core->mod_info_best，需要一个内部的帧内pb部分
    int            best_pb_part_intra;  //帧内最好的pb部分
    int            best_tb_part_intra;  //帧内最好的tb部分
#endif

    /* coefficient buffer of current CU 当前CU的系数缓冲器*/
    s16            coef[N_C][MAX_CU_DIM];
    /* CU data for RDO 用来计算RDO的CU数据们 */
    ENC_CU_DATA  cu_data_best[MAX_CU_DEPTH][MAX_CU_DEPTH];
    ENC_CU_DATA  cu_data_temp[MAX_CU_DEPTH][MAX_CU_DEPTH];
    /* temporary coefficient buffer 暂时的系数缓冲区 */
    s16            ctmp[N_C][MAX_CU_DIM];

    /* neighbor pixel buffer for intra prediction 帧内预测的相邻像素缓冲区 */
    pel            nb[N_C][N_REF][MAX_CU_SIZE * 3];
    /* current encoding LCU number 当前正在编码的LCU的编号 */
    int            lcu_num;

    /* QP for luma of current encoding CU 当前正在编码的CU块的亮度QP */
    int             qp_y;
    /* QP for chroma of current encoding CU 当前正在编码的CU块的色度QP */
    int             qp_u;
    int             qp_v;
    /* X address of current LCU 当前正在编码的LCU的x值 */
    int            x_lcu;
    /* Y address of current LCU 当前正在编码的LCU的y值 */
    int            y_lcu;
    /* X address of current CU in SCU unit SCU单元中当前CU的x值 */
    int            x_scu;
    /* Y address of current CU in SCU unit SCU单元中当前CU的y值 */
    int            y_scu;
    /* left pel position of current LCU 当前LCU的left像素的位置 */
    int            x_pel;
    /* top pel position of current LCU 当前LCU的top像素的位置 */
    int            y_pel;
    
    /* CU position in current LCU in SCU unit LCU中SCU中CU的位置*/
    int            cup;
    /* CU depth CU的深度*/
    int            cud;

    /* skip flag for MODE_INTER 帧间模式是否是skip的标志 */
    u8             skip_flag;

    /* split flag for Qt_split_flag Qt_split_flag的分割标志*/
    u8             split_flag;

    

    /* platform specific data, if needed 如果需要的话，平台特定的数据 */
    void          *pf;
    /* bitstream structure for RDO RDO的比特流结构 */
    COM_BSW       bs_temp;
    /* SBAC structure for full RDO 完整的RDO的SBAC结构 */
    ENC_SBAC     s_curr_best[MAX_CU_DEPTH][MAX_CU_DEPTH];
    ENC_SBAC     s_next_best[MAX_CU_DEPTH][MAX_CU_DEPTH];
    ENC_SBAC     s_temp_best;
    ENC_SBAC     s_temp_run;
    ENC_SBAC     s_temp_prev_comp_best;
    ENC_SBAC     s_temp_prev_comp_run;
#if TB_SPLIT_EXT
    ENC_SBAC     s_temp_pb_part_best;
#endif
    ENC_SBAC     s_curr_before_split[MAX_CU_DEPTH][MAX_CU_DEPTH];
    ENC_BEF_DATA bef_data[MAX_CU_DEPTH][MAX_CU_DEPTH][MAX_CU_CNT_IN_LCU];
#if TR_SAVE_LOAD
    u8           best_tb_part_hist;
#endif
#if TR_EARLY_TERMINATE
    s64          dist_pred_luma;
#endif

    ENC_SBAC     s_sao_init, s_sao_cur_blk, s_sao_next_blk;
    ENC_SBAC     s_sao_cur_type, s_sao_next_type;
    ENC_SBAC     s_sao_cur_mergetype, s_sao_next_mergetype;

    ENC_SBAC     s_alf_cu_ctr;
    ENC_SBAC     s_alf_initial;

    double         cost_best;
    u32            inter_satd;

    s32            dist_cu;
    s32            dist_cu_best; //dist of the best intra mode (note: only updated in intra coding now)最佳帧内模式的距离【仅在帧内编码中有】

    // for storing the update-to-date motion list 用于存储最新的运动列表
    COM_MOTION motion_cands[ALLOWED_HMVP_NUM];
    s8 cnt_hmvp_cands;

#if EXT_AMVR_HMVP
    u8    skip_mvps_check;
#endif
} ENC_CORE;

main()函数

在这里插入图片描述

mode_coding_tree()

在这里插入图片描述
mode_coding_tree()参数整理

最开始的参数由来：

上面这个赋值出现在一帧图像的开始，也就是说，这个时候依然是以帧为单位的。

mode_coding_unit()

在这里插入图片描述

copy_cu_data()

调用场景：
copy_cu_data(&ctx->map_cu_data[core->lcu_num], &core->cu_data_best[ctx->info.log2_max_cuwh - 2][ctx->info.log2_max_cuwh - 2], 0, 0, ctx->info.log2_max_cuwh, ctx->info.log2_max_cuwh, ctx->info.log2_max_cuwh, 0, TREE_LC);
从调用方式来看，copy_cu_data()就是需要保存best的那种划分模式下的各种信息。

static int copy_cu_data(ENC_CU_DATA *dst, ENC_CU_DATA *src, int x, int y, int cu_width_log2, int cu_height_log2, int log2_cus, int cud, u8 tree_status)
{
    int i, j, k;
    int cu_width, cu_height, cus;
    int cuw_scu, cuh_scu, cus_scu;
    int cx, cy;
    int size, idx_dst, idx_src;
    cx = x >> MIN_CU_LOG2;
    cy = y >> MIN_CU_LOG2;
    cu_width = 1 << cu_width_log2;
    cu_height = 1 << cu_height_log2;
    cus = 1 << log2_cus;
    cuw_scu = 1 << (cu_width_log2 - MIN_CU_LOG2);
    cuh_scu = 1 << (cu_height_log2 - MIN_CU_LOG2);
    cus_scu = 1 << (log2_cus - MIN_CU_LOG2);
    assert(tree_status != TREE_C);
    if (tree_status == TREE_C)
    {
        for (j = 0; j < cuh_scu; j++)
        {
            idx_dst = (cy + j) * cus_scu + cx;
            idx_src = j * cuw_scu;
            size = cuw_scu * sizeof(s8);
            com_mcpy(dst->ipm[1] + idx_dst, src->ipm[1] + idx_src, size);

            size = cuw_scu * sizeof(int);
            assert(*(src->num_nz_coef[Y_C] + idx_src) == 0);
            for (k = U_C; k < N_C; k++)
            {
                com_mcpy(dst->num_nz_coef[k] + idx_dst, src->num_nz_coef[k] + idx_src, size);
            }
        }

        for (j = 0; j < cu_height >> 1; j++)
        {
            idx_dst = ((y >> 1) + j) * (cus >> 1) + (x >> 1);
            idx_src = j * (cu_width >> 1);
            size = (cu_width >> 1) * sizeof(s16);
            com_mcpy(dst->coef[U_C] + idx_dst, src->coef[U_C] + idx_src, size);
            com_mcpy(dst->coef[V_C] + idx_dst, src->coef[V_C] + idx_src, size);
            size = (cu_width >> 1) * sizeof(pel);
            com_mcpy(dst->reco[U_C] + idx_dst, src->reco[U_C] + idx_src, size);
            com_mcpy(dst->reco[V_C] + idx_dst, src->reco[V_C] + idx_src, size);
        }
        return COM_OK;
    }

    for (j = 0; j < cuh_scu; j++)
    {
        idx_dst = (cy + j) * cus_scu + cx;
        idx_src = j * cuw_scu;
        size = cuw_scu * sizeof(s8);
        for (k = cud; k < MAX_CU_DEPTH; k++)
        {
            for (i = 0; i < NUM_BLOCK_SHAPE; i++)
            {
                com_mcpy(dst->split_mode[k][i] + idx_dst, src->split_mode[k][i] + idx_src, size);
            }
        }
        com_mcpy(dst->pred_mode + idx_dst, src->pred_mode + idx_src, size);
        com_mcpy(dst->mpm[0] + idx_dst, src->mpm[0] + idx_src, size);
        com_mcpy(dst->mpm[1] + idx_dst, src->mpm[1] + idx_src, size);
        com_mcpy(dst->ipm[0] + idx_dst, src->ipm[0] + idx_src, size);
        com_mcpy(dst->ipm[1] + idx_dst, src->ipm[1] + idx_src, size);
        for (i = 0; i < 8; i++)
        {
            com_mcpy(dst->mpm_ext[i] + idx_dst, src->mpm_ext[i] + idx_src, size);
        }
        com_mcpy(dst->affine_flag + idx_dst, src->affine_flag + idx_src, size);
#if SMVD
        com_mcpy( dst->smvd_flag + idx_dst, src->smvd_flag + idx_src, size );
#endif
        com_mcpy(dst->depth + idx_dst, src->depth + idx_src, size);
        size = cuw_scu * sizeof(u32);
        com_mcpy(dst->map_scu + idx_dst, src->map_scu + idx_src, size);
        com_mcpy(dst->map_cu_mode + idx_dst, src->map_cu_mode + idx_src, size);
#if TB_SPLIT_EXT
        com_mcpy(dst->map_pb_tb_part + idx_dst, src->map_pb_tb_part + idx_src, size);
#endif
        size = cuw_scu * sizeof(u8) * REFP_NUM;
        com_mcpy(*(dst->refi + idx_dst), *(src->refi + idx_src), size);
        size = cuw_scu * sizeof(u8);
        com_mcpy(dst->umve_flag + idx_dst, src->umve_flag + idx_src, size);
        com_mcpy(dst->umve_idx + idx_dst, src->umve_idx + idx_src, size);
        com_mcpy(dst->skip_idx + idx_dst, src->skip_idx + idx_src, size);
        size = cuw_scu * sizeof(u8);
        com_mcpy(dst->mvr_idx + idx_dst, src->mvr_idx + idx_src, size);
#if EXT_AMVR_HMVP
        size = cuw_scu * sizeof(u8);
        com_mcpy(dst->mvp_from_hmvp_flag + idx_dst, src->mvp_from_hmvp_flag + idx_src, size);
#endif
        size = cuw_scu * sizeof(u8);
        com_mcpy(dst->ipf_flag + idx_dst, src->ipf_flag + idx_src, size);
        size = cuw_scu * sizeof(s16) * REFP_NUM * MV_D;
        com_mcpy(dst->mv + idx_dst, src->mv + idx_src, size);
        com_mcpy(dst->mvd + idx_dst, src->mvd + idx_src, size);
        size = cuw_scu * sizeof(int);
        for (k = 0; k < N_C; k++)
        {
            com_mcpy(dst->num_nz_coef[k] + idx_dst, src->num_nz_coef[k] + idx_src, size);
        }
#if TB_SPLIT_EXT
        com_mcpy(dst->pb_part + idx_dst, src->pb_part + idx_src, size);
        com_mcpy(dst->tb_part + idx_dst, src->tb_part + idx_src, size);
#endif
    }
    for (j = 0; j < cu_height; j++)
    {
        idx_dst = (y + j) * cus + x;
        idx_src = j * cu_width;
        size = cu_width * sizeof(s16);
        com_mcpy(dst->coef[Y_C] + idx_dst, src->coef[Y_C] + idx_src, size);
        size = cu_width * sizeof(pel);
        com_mcpy(dst->reco[Y_C] + idx_dst, src->reco[Y_C] + idx_src, size);
    }
    for (j = 0; j < cu_height >> 1; j++)
    {
        idx_dst = ((y >> 1) + j) * (cus >> 1) + (x >> 1);
        idx_src = j * (cu_width >> 1);
        size = (cu_width >> 1) * sizeof(s16);
        com_mcpy(dst->coef[U_C] + idx_dst, src->coef[U_C] + idx_src, size);
        com_mcpy(dst->coef[V_C] + idx_dst, src->coef[V_C] + idx_src, size);
        size = (cu_width >> 1) * sizeof(pel);
        com_mcpy(dst->reco[U_C] + idx_dst, src->reco[U_C] + idx_src, size);
        com_mcpy(dst->reco[V_C] + idx_dst, src->reco[V_C] + idx_src, size);
    }
    return COM_OK;
}

diff_16b_32nx4n_simd()

调用场景：
enc_diff_16b(cu_width_log2, cu_height_log2, buf, pred[Y_C], stride, cu_width, cu_width, diff[Y_C]);

static void diff_16b_32nx4n_simd(int w, int h, void * src1, void * src2, int s_src1, int s_src2, int s_diff, s16 * diff)
{
    s16 * s1;
    s16 * s2;
    int i, j;
    __m128i m01, m02, m03, m04, m05, m06, m07, m08, m09, m10, m11, m12;
    s1 = (s16 *)src1;
    s2 = (s16 *)src2;
    for(i = 0; i < (h>>2); i++)
    {
        for(j = 0; j < (w>>5); j++)
        {
            SSE_DIFF_16B_8PEL(s1, s2, diff, m01, m02, m03);
            SSE_DIFF_16B_8PEL(s1+s_src1, s2+s_src2, diff+s_diff, m04, m05, m06);
            SSE_DIFF_16B_8PEL(s1+s_src1*2, s2+s_src2*2, diff+s_diff*2, m07, m08, m09);
            SSE_DIFF_16B_8PEL(s1+s_src1*3, s2+s_src2*3, diff+s_diff*3, m10, m11, m12);
            s1 += 8;
            s2 += 8;
            diff+= 8;
            SSE_DIFF_16B_8PEL(s1, s2, diff, m01, m02, m03);
            SSE_DIFF_16B_8PEL(s1+s_src1, s2+s_src2, diff+s_diff, m04, m05, m06);
            SSE_DIFF_16B_8PEL(s1+s_src1*2, s2+s_src2*2, diff+s_diff*2, m07, m08, m09);
            SSE_DIFF_16B_8PEL(s1+s_src1*3, s2+s_src2*3, diff+s_diff*3, m10, m11, m12);
            s1 += 8;
            s2 += 8;
            diff+= 8;
            SSE_DIFF_16B_8PEL(s1, s2, diff, m01, m02, m03);
            SSE_DIFF_16B_8PEL(s1+s_src1, s2+s_src2, diff+s_diff, m04, m05, m06);
            SSE_DIFF_16B_8PEL(s1+s_src1*2, s2+s_src2*2, diff+s_diff*2, m07, m08, m09);
            SSE_DIFF_16B_8PEL(s1+s_src1*3, s2+s_src2*3, diff+s_diff*3, m10, m11, m12);
            s1 += 8;
            s2 += 8;
            diff+= 8;
            SSE_DIFF_16B_8PEL(s1, s2, diff, m01, m02, m03);
            SSE_DIFF_16B_8PEL(s1+s_src1, s2+s_src2, diff+s_diff, m04, m05, m06);
            SSE_DIFF_16B_8PEL(s1+s_src1*2, s2+s_src2*2, diff+s_diff*2, m07, m08, m09);
            SSE_DIFF_16B_8PEL(s1+s_src1*3, s2+s_src2*3, diff+s_diff*3, m10, m11, m12);
            s1 += 8;
            s2 += 8;
            diff+= 8;
        }
        s1   += ((s_src1<<2) - ((w>>5)<<5));
        s2   += ((s_src2<<2) - ((w>>5)<<5));
        diff += ((s_diff<<2) - ((w>>5)<<5));
    }
}

chenyuanxu

关注

5
点赞
踩
21

收藏

觉得还不错? 一键收藏
8
评论
AVS3代码阅读HPM4.0

参考AVS3代码阅读（HPM6.0）（一.整体架构以及CU划分部分代码）版本：HPM4.0核心文件：app_encoder.c 要做的是帧间预测，所以前面的函数先暂时不管了，挖个坑以后再填。直接从enc_pic看起。enc_pic() ...
复制链接

扫一扫

专栏目录