AV1时域滤波相关代码

maryhaocool2023

已于 2022-09-15 10:43:21 修改

阅读量729

点赞数 1

分类专栏： av1 文章标签： java 开发语言

于 2022-09-07 16:54:36 首次发布

本文链接：https://blog.csdn.net/maryhaocoolcool/article/details/126749282

版权

av1 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

本文介绍了AV1编码器的时域滤波算法，包括非局部均值计算块差异、噪声水平估算及八边形运动搜索模式。优化后，在不同分辨率视频上实现了PSNR和SSIM的显著提升。滤波过程涉及多线程处理，关键代码包含在1pass和2pass的编码流程中，以及滤波前对key_frame的参数设置和运动搜索代码段。

摘要由CSDN通过智能技术生成

链接：AV1编码器优化技术_mob604756f6460e的技术博客_51CTO博客

总结，av1时域滤波算法的特点：1.使用一种称为非局部均值的方法来计算块差异，并确定我们要分配给该块的权重，2在帧内有一个噪声水平估计算法，一旦噪声很高时，我们便使用更强的过滤器。3 改为使用八边形运动搜索模式；

结果：对于480p的中分辨率视频，PSNR的增益约为4-5%，SSIM的增益约为9%；对于720和1080p这类较高分辨率的视频内容，PSNR增益约为6.5%，而SSIM的增益约为11％至12％。

滤波函数：

多线程：

prepare_tf_workers->tf_worker_hook->av1_tf_do_filtering_row->

相关代码：

Multi-threads:
  // Perform temporal filtering process.
  if (mt_info->num_workers > 1)
    av1_tf_do_filtering_mt(cpi);
  else
    tf_do_filtering(cpi);

1pass：

av1_encode_strategy->denoise_and_encode->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->

2pass：

encoder_encode->av1_get_compressed_data->av1_encode_strategy->av1_get_second_pass_params->av1_tf_info_filtering->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->

滤波前对key_frame 设置相关参数：

void av1_setup_past_independence(AV1_COMMON *cm) {
  // Reset the segment feature data to the default stats:
  // Features disabled, 0, with delta coding (Default state).
  av1_clearall_segfeatures(&cm->seg);

  //base boost1 boost2
  if (cm->cur_frame->seg_map) {
    memset(cm->cur_frame->seg_map, 0,
           (cm->cur_frame->mi_rows * cm->cur_frame->mi_cols));
  }

  // reset mode ref deltas
  av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
  av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
  set_default_lf_deltas(&cm->lf);

  av1_default_coef_probs(cm);
  av1_init_mode_probs(cm->fc);
  av1_init_mv_probs(cm);
  cm->fc->initialized = 1;
  av1_setup_frame_contexts(cm);
}

运动搜索代码：

static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
                             const YV12_BUFFER_CONFIG *frame_to_filter,
                             const YV12_BUFFER_CONFIG *ref_frame,
                             const BLOCK_SIZE block_size, const int mb_row,
                             const int mb_col, MV *ref_mv, MV *subblock_mvs,
                             int *subblock_mses) {
  // Frame information
  const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);

  // Block information (ONLY Y-plane is used for motion search).
  const int mb_height = block_size_high[block_size];
  const int mb_width = block_size_wide[block_size];
  const int mb_pels = mb_height * mb_width;
  const int y_stride = frame_to_filter->y_stride;
  assert(y_stride == ref_frame->y_stride);
  const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;

  // Save input state.
  MACROBLOCKD *const mbd = &mb->e_mbd;
  const struct buf_2d ori_src_buf = mb->plane[0].src;
  const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];

  // Parameters used for motion search.
  FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
  SUBPEL_MOTION_SEARCH_PARAMS ms_params;
  const SEARCH_METHODS search_method = NSTEP;
  const search_site_config *search_site_cfg =
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
  const int step_param = av1_init_search_range(
      AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
  const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
  const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
  const MV_COST_TYPE mv_cost_type =
      min_frame_size >= 720
          ? MV_COST_L1_HDRES
          : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);

  // Starting position for motion search.
  FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
  // Baseline position for motion search (used for rate distortion comparison).
  const MV baseline_mv = kZeroMv;

  // Setup.
  mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
  mb->plane[0].src.stride = y_stride;
  mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
  mbd->plane[0].pre[0].stride = y_stride;
  // Unused intermediate results for motion search.
  unsigned int sse, error;
  int distortion;
  int cost_list[5];

  // Do motion search.
  int_mv best_mv;  // Searched motion vector.
  int block_mse = INT_MAX;
  MV block_mv = kZeroMv;

  av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
                                     &baseline_mv, search_site_cfg,
                                     /*fine_search_interval=*/0);
  av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
  full_ms_params.run_mesh_search = 1;
  full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;

  av1_full_pixel_search(start_mv, &full_ms_params, step_param,
                        cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
                        NULL);

  if (force_integer_mv == 1) {  // Only do full search on the entire block.
    const int mv_row = best_mv.as_mv.row;
    const int mv_col = best_mv.as_mv.col;
    best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
    best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
    const int mv_offset = mv_row * y_stride + mv_col;
    error = cpi->ppi->fn_ptr[block_size].vf(
        ref_frame->y_buffer + y_offset + mv_offset, y_stride,
        frame_to_filter->y_buffer + y_offset, y_stride, &sse);
    block_mse = DIVIDE_AND_ROUND(error, mb_pels);
    block_mv = best_mv.as_mv;
  } else {  // Do fractional search on the entire block and all sub-blocks.
    av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
                                      &baseline_mv, cost_list);
    ms_params.forced_stop = EIGHTH_PEL;
    ms_params.var_params.subpel_search_type = subpel_search_type;
    // Since we are merely refining the result from full pixel search, we don't
    // need regularization for subpel search
    ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;

    MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
    error = cpi->mv_search_params.find_fractional_mv_step(
        &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv,
        &distortion, &sse, NULL);
    block_mse = DIVIDE_AND_ROUND(error, mb_pels);
    block_mv = best_mv.as_mv;
    *ref_mv = best_mv.as_mv;
    // On 4 sub-blocks.
    const BLOCK_SIZE subblock_size = ss_size_lookup[block_size][1][1];
    const int subblock_height = block_size_high[subblock_size];
    const int subblock_width = block_size_wide[subblock_size];
    const int subblock_pels = subblock_height * subblock_width;
    start_mv = get_fullmv_from_mv(ref_mv);

    int subblock_idx = 0;
    for (int i = 0; i < mb_height; i += subblock_height) {
      for (int j = 0; j < mb_width; j += subblock_width) {
        const int offset = i * y_stride + j;
        mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
        mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
        av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
                                           subblock_size, &baseline_mv,
                                           search_site_cfg,
                                           /*fine_search_interval=*/0);
        av1_set_mv_search_method(&full_ms_params, search_site_cfg,
                                 search_method);
        full_ms_params.run_mesh_search = 1;
        full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;

        av1_full_pixel_search(start_mv, &full_ms_params, step_param,
                              cond_cost_list(cpi, cost_list),
                              &best_mv.as_fullmv, NULL);

        av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
                                          &baseline_mv, cost_list);
        ms_params.forced_stop = EIGHTH_PEL;
        ms_params.var_params.subpel_search_type = subpel_search_type;
        // Since we are merely refining the result from full pixel search, we
        // don't need regularization for subpel search
        ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;

        subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
        error = cpi->mv_search_params.find_fractional_mv_step(
            &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
            &best_mv.as_mv, &distortion, &sse, NULL);
        subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
        subblock_mvs[subblock_idx] = best_mv.as_mv;
        ++subblock_idx;
      }
    }
  }

  // Restore input state.
  mb->plane[0].src = ori_src_buf;
  mbd->plane[0].pre[0] = ori_pre_buf;

  // Make partition decision.
  tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
                               subblock_mses);

  // Do not pass down the reference motion vector if error is too large.
  const int thresh = (min_frame_size >= 720) ? 12 : 3;
  if (block_mse > (thresh << (mbd->bd - 8))) {
    *ref_mv = kZeroMv;
  }
}

滤波函数代码：

void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
  TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
  YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
  const int num_frames = tf_ctx->num_frames;
  const int filter_frame_idx = tf_ctx->filter_frame_idx;
  const int compute_frame_diff = tf_ctx->compute_frame_diff;
  const struct scale_factors *scale = &tf_ctx->sf;
  const double *noise_levels = tf_ctx->noise_levels;
  const int num_pels = tf_ctx->num_pels;
  const int q_factor = tf_ctx->q_factor;
  const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
  const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
  MACROBLOCK *const mb = &td->mb;
  MACROBLOCKD *const mbd = &mb->e_mbd;
  TemporalFilterData *const tf_data = &td->tf_data;
  const int mb_height = block_size_high[block_size];
  const int mb_width = block_size_wide[block_size];
  const int mi_h = mi_size_high_log2[block_size];
  const int mi_w = mi_size_wide_log2[block_size];
  const int num_planes = av1_num_planes(&cpi->common);
  uint32_t *accum = tf_data->accum;
  uint16_t *count = tf_data->count;
  uint8_t *pred = tf_data->pred;

  // Factor to control the filering strength.
  const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;

  // Do filtering.
  FRAME_DIFF *diff = &td->tf_data.diff;
  av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
                        (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
                        cpi->oxcf.border_in_pixels);
  for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
    av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
                          (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
                          cpi->oxcf.border_in_pixels);
    memset(accum, 0, num_pels * sizeof(accum[0]));
    memset(count, 0, num_pels * sizeof(count[0]));
    MV ref_mv = kZeroMv;  // Reference motion vector passed down along frames.
                          // Perform temporal filtering frame by frame.
    for (int frame = 0; frame < num_frames; frame++) {
      if (frames[frame] == NULL) continue;

      // Motion search.
      MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
      int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
      if (frame ==
          filter_frame_idx) {  // Frame to be filtered.
                               // Change ref_mv sign for following frames.
        ref_mv.row *= -1;
        ref_mv.col *= -1;
      } else {  // Other reference frames.
        tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
                         mb_row, mb_col, &ref_mv, subblock_mvs, subblock_mses);
      }

      // Perform weighted averaging.
      if (frame == filter_frame_idx) {  // Frame to be filtered.
        tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
                                      mb_col, num_planes, accum, count);
      } else {  // Other reference frames.
        tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
                           num_planes, scale, subblock_mvs, pred);

        // All variants of av1_apply_temporal_filter() contain floating point
        // operations. Hence, clear the system state.

        // TODO(any): avx2/sse2 version should be changed to align with C
        // function before using. In particular, current avx2/sse2 function
        // only supports 32x32 block size and 5x5 filtering window.
        if (is_frame_high_bitdepth(frame_to_filter)) {  // for high bit-depth
#if CONFIG_AV1_HIGHBITDEPTH
          if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
            av1_highbd_apply_temporal_filter(
                frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
                noise_levels, subblock_mvs, subblock_mses, q_factor,
                filter_strength, pred, accum, count);
          } else {
#endif  // CONFIG_AV1_HIGHBITDEPTH
            av1_apply_temporal_filter_c(
                frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
                noise_levels, subblock_mvs, subblock_mses, q_factor,
                filter_strength, pred, accum, count);
#if CONFIG_AV1_HIGHBITDEPTH
          }
#endif            // CONFIG_AV1_HIGHBITDEPTH
        } else {  // for 8-bit
          if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
            av1_apply_temporal_filter(frame_to_filter, mbd, block_size, mb_row,
                                      mb_col, num_planes, noise_levels,
                                      subblock_mvs, subblock_mses, q_factor,
                                      filter_strength, pred, accum, count);
          } else {
            av1_apply_temporal_filter_c(
                frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
                noise_levels, subblock_mvs, subblock_mses, q_factor,
                filter_strength, pred, accum, count);
          }
        }
      }
    }
    tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
                                accum, count, tf_ctx->output_frame);

    if (compute_frame_diff) {
      const int y_height = mb_height >> mbd->plane[0].subsampling_y;
      const int y_width = mb_width >> mbd->plane[0].subsampling_x;
      const int source_y_stride = frame_to_filter->y_stride;
      const int filter_y_stride = tf_ctx->output_frame->y_stride;
      const int source_offset =
          mb_row * y_height * source_y_stride + mb_col * y_width;
      const int filter_offset =
          mb_row * y_height * filter_y_stride + mb_col * y_width;
      unsigned int sse = 0;
      cpi->ppi->fn_ptr[block_size].vf(
          frame_to_filter->y_buffer + source_offset, source_y_stride,
          tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
          &sse);
      diff->sum += sse;
      diff->sse += sse * (int64_t)sse;
    }
  }
}