链接:AV1编码器优化技术_mob604756f6460e的技术博客_51CTO博客
总结,av1时域滤波算法的特点:1.使用一种称为非局部均值的方法来计算块差异,并确定我们要分配给该块的权重,2在帧内有一个噪声水平估计算法,一旦噪声很高时,我们便使用更强的过滤器。3 改为使用八边形运动搜索模式;
结果:对于480p的中分辨率视频,PSNR的增益约为4-5%,SSIM的增益约为9%;对于720和1080p这类较高分辨率的视频内容,PSNR增益约为6.5%,而SSIM的增益约为11%至12%。
滤波函数:
多线程:
prepare_tf_workers->tf_worker_hook->av1_tf_do_filtering_row->
相关代码:
Multi-threads:
// Perform temporal filtering process.
if (mt_info->num_workers > 1)
av1_tf_do_filtering_mt(cpi);
else
tf_do_filtering(cpi);
1pass:
av1_encode_strategy->denoise_and_encode->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->
2pass:
encoder_encode->av1_get_compressed_data->av1_encode_strategy->av1_get_second_pass_params->av1_tf_info_filtering->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->
滤波前对key_frame 设置相关参数:
void av1_setup_past_independence(AV1_COMMON *cm) {
// Reset the segment feature data to the default stats:
// Features disabled, 0, with delta coding (Default state).
av1_clearall_segfeatures(&cm->seg);
//base boost1 boost2
if (cm->cur_frame->seg_map) {
memset(cm->cur_frame->seg_map, 0,
(cm->cur_frame->mi_rows * cm->cur_frame->mi_cols));
}
// reset mode ref deltas
av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
set_default_lf_deltas(&cm->lf);
av1_default_coef_probs(cm);
av1_init_mode_probs(cm->fc);
av1_init_mv_probs(cm);
cm->fc->initialized = 1;
av1_setup_frame_contexts(cm);
}
运动搜索代码:
static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
const YV12_BUFFER_CONFIG *frame_to_filter,
const YV12_BUFFER_CONFIG *ref_frame,
const BLOCK_SIZE block_size, const int mb_row,
const int mb_col, MV *ref_mv, MV *subblock_mvs,
int *subblock_mses) {
// Frame information
const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
// Block information (ONLY Y-plane is used for motion search).
const int mb_height = block_size_high[block_size];
const int mb_width = block_size_wide[block_size];
const int mb_pels = mb_height * mb_width;
const int y_stride = frame_to_filter->y_stride;
assert(y_stride == ref_frame->y_stride);
const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
// Save input state.
MACROBLOCKD *const mbd = &mb->e_mbd;
const struct buf_2d ori_src_buf = mb->plane[0].src;
const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
// Parameters used for motion search.
FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
SUBPEL_MOTION_SEARCH_PARAMS ms_params;
const SEARCH_METHODS search_method = NSTEP;
const search_site_config *search_site_cfg =
cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
const int step_param = av1_init_search_range(
AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
const MV_COST_TYPE mv_cost_type =
min_frame_size >= 720
? MV_COST_L1_HDRES
: (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
// Starting position for motion search.
FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
// Baseline position for motion search (used for rate distortion comparison).
const MV baseline_mv = kZeroMv;
// Setup.
mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
mb->plane[0].src.stride = y_stride;
mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
mbd->plane[0].pre[0].stride = y_stride;
// Unused intermediate results for motion search.
unsigned int sse, error;
int distortion;
int cost_list[5];
// Do motion search.
int_mv best_mv; // Searched motion vector.
int block_mse = INT_MAX;
MV block_mv = kZeroMv;
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
&baseline_mv, search_site_cfg,
/*fine_search_interval=*/0);
av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
full_ms_params.run_mesh_search = 1;
full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
av1_full_pixel_search(start_mv, &full_ms_params, step_param,
cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
NULL);
if (force_integer_mv == 1) { // Only do full search on the entire block.
const int mv_row = best_mv.as_mv.row;
const int mv_col = best_mv.as_mv.col;
best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
const int mv_offset = mv_row * y_stride + mv_col;
error = cpi->ppi->fn_ptr[block_size].vf(
ref_frame->y_buffer + y_offset + mv_offset, y_stride,
frame_to_filter->y_buffer + y_offset, y_stride, &sse);
block_mse = DIVIDE_AND_ROUND(error, mb_pels);
block_mv = best_mv.as_mv;
} else { // Do fractional search on the entire block and all sub-blocks.
av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
&baseline_mv, cost_list);
ms_params.forced_stop = EIGHTH_PEL;
ms_params.var_params.subpel_search_type = subpel_search_type;
// Since we are merely refining the result from full pixel search, we don't
// need regularization for subpel search
ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
error = cpi->mv_search_params.find_fractional_mv_step(
&mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv,
&distortion, &sse, NULL);
block_mse = DIVIDE_AND_ROUND(error, mb_pels);
block_mv = best_mv.as_mv;
*ref_mv = best_mv.as_mv;
// On 4 sub-blocks.
const BLOCK_SIZE subblock_size = ss_size_lookup[block_size][1][1];
const int subblock_height = block_size_high[subblock_size];
const int subblock_width = block_size_wide[subblock_size];
const int subblock_pels = subblock_height * subblock_width;
start_mv = get_fullmv_from_mv(ref_mv);
int subblock_idx = 0;
for (int i = 0; i < mb_height; i += subblock_height) {
for (int j = 0; j < mb_width; j += subblock_width) {
const int offset = i * y_stride + j;
mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
subblock_size, &baseline_mv,
search_site_cfg,
/*fine_search_interval=*/0);
av1_set_mv_search_method(&full_ms_params, search_site_cfg,
search_method);
full_ms_params.run_mesh_search = 1;
full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
av1_full_pixel_search(start_mv, &full_ms_params, step_param,
cond_cost_list(cpi, cost_list),
&best_mv.as_fullmv, NULL);
av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
&baseline_mv, cost_list);
ms_params.forced_stop = EIGHTH_PEL;
ms_params.var_params.subpel_search_type = subpel_search_type;
// Since we are merely refining the result from full pixel search, we
// don't need regularization for subpel search
ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
error = cpi->mv_search_params.find_fractional_mv_step(
&mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
&best_mv.as_mv, &distortion, &sse, NULL);
subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
subblock_mvs[subblock_idx] = best_mv.as_mv;
++subblock_idx;
}
}
}
// Restore input state.
mb->plane[0].src = ori_src_buf;
mbd->plane[0].pre[0] = ori_pre_buf;
// Make partition decision.
tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
subblock_mses);
// Do not pass down the reference motion vector if error is too large.
const int thresh = (min_frame_size >= 720) ? 12 : 3;
if (block_mse > (thresh << (mbd->bd - 8))) {
*ref_mv = kZeroMv;
}
}
滤波函数代码:
void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
const int num_frames = tf_ctx->num_frames;
const int filter_frame_idx = tf_ctx->filter_frame_idx;
const int compute_frame_diff = tf_ctx->compute_frame_diff;
const struct scale_factors *scale = &tf_ctx->sf;
const double *noise_levels = tf_ctx->noise_levels;
const int num_pels = tf_ctx->num_pels;
const int q_factor = tf_ctx->q_factor;
const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
MACROBLOCK *const mb = &td->mb;
MACROBLOCKD *const mbd = &mb->e_mbd;
TemporalFilterData *const tf_data = &td->tf_data;
const int mb_height = block_size_high[block_size];
const int mb_width = block_size_wide[block_size];
const int mi_h = mi_size_high_log2[block_size];
const int mi_w = mi_size_wide_log2[block_size];
const int num_planes = av1_num_planes(&cpi->common);
uint32_t *accum = tf_data->accum;
uint16_t *count = tf_data->count;
uint8_t *pred = tf_data->pred;
// Factor to control the filering strength.
const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
// Do filtering.
FRAME_DIFF *diff = &td->tf_data.diff;
av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
(mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
cpi->oxcf.border_in_pixels);
for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
(mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
cpi->oxcf.border_in_pixels);
memset(accum, 0, num_pels * sizeof(accum[0]));
memset(count, 0, num_pels * sizeof(count[0]));
MV ref_mv = kZeroMv; // Reference motion vector passed down along frames.
// Perform temporal filtering frame by frame.
for (int frame = 0; frame < num_frames; frame++) {
if (frames[frame] == NULL) continue;
// Motion search.
MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
if (frame ==
filter_frame_idx) { // Frame to be filtered.
// Change ref_mv sign for following frames.
ref_mv.row *= -1;
ref_mv.col *= -1;
} else { // Other reference frames.
tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
mb_row, mb_col, &ref_mv, subblock_mvs, subblock_mses);
}
// Perform weighted averaging.
if (frame == filter_frame_idx) { // Frame to be filtered.
tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
mb_col, num_planes, accum, count);
} else { // Other reference frames.
tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
num_planes, scale, subblock_mvs, pred);
// All variants of av1_apply_temporal_filter() contain floating point
// operations. Hence, clear the system state.
// TODO(any): avx2/sse2 version should be changed to align with C
// function before using. In particular, current avx2/sse2 function
// only supports 32x32 block size and 5x5 filtering window.
if (is_frame_high_bitdepth(frame_to_filter)) { // for high bit-depth
#if CONFIG_AV1_HIGHBITDEPTH
if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
av1_highbd_apply_temporal_filter(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
noise_levels, subblock_mvs, subblock_mses, q_factor,
filter_strength, pred, accum, count);
} else {
#endif // CONFIG_AV1_HIGHBITDEPTH
av1_apply_temporal_filter_c(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
noise_levels, subblock_mvs, subblock_mses, q_factor,
filter_strength, pred, accum, count);
#if CONFIG_AV1_HIGHBITDEPTH
}
#endif // CONFIG_AV1_HIGHBITDEPTH
} else { // for 8-bit
if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
av1_apply_temporal_filter(frame_to_filter, mbd, block_size, mb_row,
mb_col, num_planes, noise_levels,
subblock_mvs, subblock_mses, q_factor,
filter_strength, pred, accum, count);
} else {
av1_apply_temporal_filter_c(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
noise_levels, subblock_mvs, subblock_mses, q_factor,
filter_strength, pred, accum, count);
}
}
}
}
tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
accum, count, tf_ctx->output_frame);
if (compute_frame_diff) {
const int y_height = mb_height >> mbd->plane[0].subsampling_y;
const int y_width = mb_width >> mbd->plane[0].subsampling_x;
const int source_y_stride = frame_to_filter->y_stride;
const int filter_y_stride = tf_ctx->output_frame->y_stride;
const int source_offset =
mb_row * y_height * source_y_stride + mb_col * y_width;
const int filter_offset =
mb_row * y_height * filter_y_stride + mb_col * y_width;
unsigned int sse = 0;
cpi->ppi->fn_ptr[block_size].vf(
frame_to_filter->y_buffer + source_offset, source_y_stride,
tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
&sse);
diff->sum += sse;
diff->sse += sse * (int64_t)sse;
}
}
}