这篇文章主要分析AV1中的Palette Mode,但是AV1的Palette Mode也不会跟其他标准中的Palette Mode有多大的差别,我的另外一篇关于Palette Mode的文章讲了HEVC中的Palette Mode,可以参考一下。
AV1的Palette Mode
在AV1的palette mode,与其他标准不同的一点是,像HEVC中,palette mode是作为hevc scc的一个单独工具存在的,属于HEVC 的extension,而AV1的palette mode是规定在specification中,而且默认是处于开启状态的。
AV1 的palette mode 属于帧内预测编码中的一环,从代码中可以看得出来,其函数位于 **_intra_sb* 中,其对Luma和chroma采用不同的palette predictor,每一个predictor可以有2-8种颜色,在LibAOM中的定义为
// Maximum number of colors in a palette.
#define PALETTE_MAX_SIZE 8
// Minimum number of colors in a palette.
#define PALETTE_MIN_SIZE 2
不同的palette predictor意味着palette predictor预测表所包含的颜色数量可能不一样,同样,块的每一个像素mapping的color index也可能不同。
AOM LibAV1中 palette mode在编码器中的代码入口为 av1_rd_pick_palette_intra_sby 函数
// Searches palette
if (try_palette)
{
av1_rd_pick_palette_intra_sby(
cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
&best_rd, &best_model_rd, rate, rate_tokenonly, distortion, skippable,
&beat_best_rd, ctx, ctx->blk_skip, ctx->tx_type_map);
}
整体的结构代码
void av1_rd_pick_palette_intra_sby(
const AV1_COMP* cpi,
MACROBLOCK* x,
BLOCK_SIZE bsize,
int dc_mode_cost,
MB_MODE_INFO* best_mbmi,
uint8_t* best_palette_color_map,
int64_t* best_rd,
int64_t* best_model_rd,
int* rate,
int* rate_tokenonly,
int64_t* distortion,
int* skippable,
int* beat_best_rd,
PICK_MODE_CONTEXT* ctx,
uint8_t* best_blk_skip,
uint8_t* tx_type_map)
{
MACROBLOCKD* const xd = &x->e_mbd;
MB_MODE_INFO* const mbmi = xd->mi[0];
assert(!is_inter_block(mbmi));
assert(av1_allow_palette(cpi->common.features.allow_screen_content_tools, bsize));
assert(PALETTE_MAX_SIZE == 8);
assert(PALETTE_MIN_SIZE == 2);
const int src_stride = x->plane[0].src.stride;
const uint8_t* const src = x->plane[0].src.buf;
int block_width, block_height, rows, cols;
// 获取当前块的大小
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, &cols);
const SequenceHeader* const seq_params = &cpi->common.seq_params;
const int is_hbd = seq_params->use_highbitdepth;
const int bit_depth = seq_params->bit_depth;
int unused;
int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
int count_buf_8bit[1 << 8]; // Maximum (1 << 8) bins for hbd path.
int colors, colors_threshold = 0;
if (is_hbd)
{
av1_count_colors_highbd(src, src_stride, rows, cols, bit_depth, count_buf, count_buf_8bit, &colors_threshold, &colors);
}
else
{
// 通过计算直方图统计当前block中一共包含多少种颜色
av1_count_colors(src, src_stride, rows, cols, count_buf, &colors);
colors_threshold = colors;
}
uint8_t* const color_map = xd->plane[0].color_index_map;
if (colors_threshold > 1 && colors_threshold <= 64)
{
int* const data = x->palette_buffer->kmeans_data_buf;
int centroids[PALETTE_MAX_SIZE];
int lower_bound, upper_bound;
// data中保存当前CU的所有unique灰度值,lower_bound和upper_bound分别是这些灰度值中最小与最大值。
fill_data_and_get_bounds(src, src_stride, rows, cols, is_hbd, data, &lower_bound, &upper_bound);
mbmi->mode = DC_PRED;
mbmi->filter_intra_mode_info.use_filter_intra = 0;
uint16_t color_cache[2 * PALETTE_MAX_SIZE];
const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
// Find the dominant colors, stored in top_colors[].
// 这里把之前放到data里的所有的灰度值,获取出现次数最多的前PALETTE_MAX_SIZE个放入top_colors中。
int top_colors[PALETTE_MAX_SIZE] = { 0 };
for (int i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i)
{
int max_count = 0;
for (int j = 0; j < (1 << bit_depth); ++j)
{
if (count_buf[j] > max_count)
{
max_count = count_buf[j];
top_colors[i] = j;
}
}
assert(max_count > 0);
count_buf[top_colors[i]] = 0;
}
// TODO(huisu@google.com): Try to avoid duplicate computation in cases
// where the dominant colors and the k-means results are similar.
if ((cpi->sf.intra_sf.prune_palette_search_level == 1) && (colors > PALETTE_MIN_SIZE))
{
// Start index and step size below are chosen to evaluate unique
// candidates in neighbor search, in case a winner candidate is found in
// coarse search. Example,
// 1) 8 colors (end_n = 8): 2,3,4,5,6,7,8. start_n is chosen as 2 and step
// size is chosen as 3. Therefore, coarse search will evaluate 2, 5 and 8.
// If winner is found at 5, then 4 and 6 are evaluated. Similarly, for 2
// (3) and 8 (7).
// 2) 7 colors (end_n = 7): 2,3,4,5,6,7. If start_n is chosen as 2 (same
// as for 8 colors) then step size should also be 2, to cover all
// candidates. Coarse search will evaluate 2, 4 and 6. If winner is either
// 2 or 4, 3 will be evaluated. Instead, if start_n=3 and step_size=3,
// coarse search will evaluate 3 and 6. For the winner, unique neighbors
// (3: 2,4 or 6: 5,7) would be evaluated.
// Start index for coarse palette search for dominant colors and k-means
const uint8_t start_n_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
3, 3, 2,
3, 3, 2 };
// Step size for coarse palette search for dominant colors and k-means
const uint8_t step_size_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
3, 3, 3,
3, 3, 3 };
// Choose the start index and step size for coarse search based on number
// of colors
const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE);
const int min_n = start_n_lookup_table[max_n];
const int step_size = step_size_lookup_table[max_n];
assert(min_n >= PALETTE_MIN_SIZE);
// Perform top color coarse palette search to find the winner candidate
const int top_color_winner = perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
step_size, &unused, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for dominant colors
if (top_color_winner <= max_n) {
int stage2_min_n, stage2_max_n, stage2_step_size;
set_stage2_params(&stage2_min_n, &stage2_max_n, &stage2_step_size,
top_color_winner, max_n);
// perform finer search for the winner candidate
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, stage2_min_n,
stage2_max_n + 1, stage2_step_size, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate,
rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
best_blk_skip, tx_type_map);
}
// K-means clustering.
// Perform k-means coarse palette search to find the winner candidate
const int k_means_winner = perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
min_n, max_n + 1, step_size, &unused, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
color_map, rows * cols);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for k-means
if (k_means_winner <= max_n) {
int start_n_stage2, end_n_stage2, step_size_stage2;
set_stage2_params(&start_n_stage2, &end_n_stage2, &step_size_stage2, k_means_winner, max_n);
// perform finer search for the winner candidate
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
start_n_stage2, end_n_stage2 + 1, step_size_stage2, &unused,
color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
best_model_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
rows * cols);
}
}
else {
const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
min_n = PALETTE_MIN_SIZE;
// Perform top color palette search in descending order
int last_n_searched = max_n;
perform_top_color_palette_search(cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, max_n, min_n - 1,
-1, &last_n_searched, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
if (last_n_searched > min_n)
{
// Search in ascending order until we get to the previous best
perform_top_color_palette_search( cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n,
last_n_searched, 1, &unused, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
}
// K-means clustering.
if (colors == PALETTE_MIN_SIZE)
{
// Special case: These colors automatically become the centroids.
assert(colors == 2);
centroids[0] = lower_bound;
centroids[1] = upper_bound;
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, colors, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map, NULL);
}
else {
// Perform k-means palette search in descending order
last_n_searched = max_n;
perform_k_means_palette_search(cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound, max_n, min_n - 1, -1, &last_n_searched, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map, rows * cols);
if (last_n_searched > min_n) {
// Search in ascending order until we get to the previous best
perform_k_means_palette_search( cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound, min_n, last_n_searched, 1, &unused, color_cache, n_cache,
best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map, rows * cols);
}
}
}
}
if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
memcpy(color_map, best_palette_color_map,
block_width * block_height * sizeof(best_palette_color_map[0]));
}
*mbmi = *best_mbmi;
}
函数 av1_count_colors
是通过计算直方图统计当前block中一共包含多少种颜色。
void av1_count_colors(const uint8_t *src, int stride, int rows, int cols, int *val_count, int *num_colors)
{
const int max_pix_val = 1 << 8;
memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
for (int r = 0; r < rows; ++r)
{
for (int c = 0; c < cols; ++c)
{
const int this_val = src[r * stride + c];
assert(this_val < max_pix_val);
++val_count[this_val];
}
}
int n = 0;
for (int i = 0; i < max_pix_val; ++i)
{
if (val_count[i]) ++n;
}
*num_colors = n;
}
关键函数perform_top_color_palette_search
负责把palette mode的2-8种颜色来与当前block的每一个像素(luma+chroma)进行mapping,通过RDO来确认最佳的颜色数量个数与每个像素的color indice。
// Performs count-based palette search with number of colors in interval [start_n, end_n) with step size step_size. If step_size < 0, then end_n can
// be less than start_n. Saves the last numbers searched in last_n_searched and returns the best number of colors found.
static AOM_INLINE int perform_top_color_palette_search(
const AV1_COMP* const cpi, MACROBLOCK* x, MB_MODE_INFO* mbmi, BLOCK_SIZE bsize, int dc_mode_cost, const int* data, int* top_colors,
int start_n, int end_n, int step_size, int* last_n_searched, uint16_t* color_cache, int n_cache, MB_MODE_INFO* best_mbmi, uint8_t* best_palette_color_map, int64_t* best_rd, int64_t* best_model_rd,
int* rate, int* rate_tokenonly, int64_t* distortion, int* skippable, int* beat_best_rd, PICK_MODE_CONTEXT* ctx, uint8_t* best_blk_skip, uint8_t* tx_type_map)
{
int centroids[PALETTE_MAX_SIZE];
int n = start_n;
int top_color_winner = end_n;
/* clang-format off */
assert(IMPLIES(step_size < 0, start_n > end_n));
/* clang-format on */
assert(IMPLIES(step_size > 0, start_n < end_n));
// 从后(8)到前(2) 、从前(2)到后(8),抽取不同个数的颜色来计算rdcost,最后进行比较,获取最好。
while (!is_iter_over(n, end_n, step_size))
{
int beat_best_palette_rd = 0;
memcpy(centroids, top_colors, n * sizeof(top_colors[0]));
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, &beat_best_palette_rd);
*last_n_searched = n;
if (beat_best_palette_rd)
{
top_color_winner = n;
}
else if (cpi->sf.intra_sf.prune_palette_search_level == 2)
{
// At search level 2, we return immediately if we don't see an improvement
return top_color_winner;
}
n += step_size;
}
return top_color_winner;
}
在这个函数里,就是靠函数 palette_rd_y
来计算rdcost,返回一个beat_best_palette_rd
这个变量指明是否当前选项(颜色数量+color indice组合)的RD超过 best RD。
/*!\brief Calculate the luma palette cost from a given color palette
*
* \ingroup palette_mode_search
* \callergraph
* Given the base colors as specified in centroids[], calculate the RD cost
* of palette mode.
*/
static AOM_INLINE void palette_rd_y(
const AV1_COMP* const cpi,
MACROBLOCK* x,
MB_MODE_INFO* mbmi,
BLOCK_SIZE bsize,
int dc_mode_cost,
const int* data,
int* centroids,
int n,
uint16_t* color_cache,
int n_cache,
MB_MODE_INFO* best_mbmi,
uint8_t* best_palette_color_map,
int64_t* best_rd,
int64_t* best_model_rd,
int* rate,
int* rate_tokenonly,
int64_t* distortion,
int* skippable,
int* beat_best_rd,
PICK_MODE_CONTEXT* ctx,
uint8_t* blk_skip,
uint8_t* tx_type_map,
int* beat_best_palette_rd)
{
optimize_palette_colors(color_cache, n_cache, n, 1, centroids, cpi->common.seq_params.bit_depth);
const int num_unique_colors = av1_remove_duplicates(centroids, n);
if (num_unique_colors < PALETTE_MIN_SIZE)
{
// Too few unique colors to create a palette. And DC_PRED will work
// well for that case anyway. So skip.
return;
}
PALETTE_MODE_INFO* const pmi = &mbmi->palette_mode_info;
if (cpi->common.seq_params.use_highbitdepth)
{
for (int i = 0; i < num_unique_colors; ++i)
{
pmi->palette_colors[i] = clip_pixel_highbd((int)centroids[i], cpi->common.seq_params.bit_depth);
}
}
else {
for (int i = 0; i < num_unique_colors; ++i)
{
pmi->palette_colors[i] = clip_pixel(centroids[i]);
}
}
pmi->palette_size[0] = num_unique_colors;
MACROBLOCKD* const xd = &x->e_mbd;
uint8_t* const color_map = xd->plane[0].color_index_map;
int block_width, block_height, rows, cols;
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, &cols);
// 将当前Block中的每一个pixel都归类到centroid中来,形成color_map,color map的大小为 block_width * block_height
av1_calc_indices(data, centroids, color_map, rows * cols, num_unique_colors, 1);
// 若块处于图像的边缘,可能会导致当前块的一部分并不存在color map值,所以该处是填充那些处于帧外的部分,用拷贝最近的像素进行填充
extend_palette_color_map(color_map, cols, rows, block_width, block_height);
if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd))
{
return;
}
RD_STATS tokenonly_rd_stats;
av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
if (tokenonly_rd_stats.rate == INT_MAX) return;
const int palette_mode_cost = intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize))
{
tokenonly_rd_stats.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
}
// Collect mode stats for multiwinner mode processing
const int txfm_search_done = 1;
store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_DC, color_map, bsize, this_rd, cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
if (this_rd < *best_rd)
{
*best_rd = this_rd;
// Setting beat_best_rd flag because current mode rd is better than best_rd.
// This flag need to be updated only for palette evaluation in key frames
if (beat_best_rd) *beat_best_rd = 1;
memcpy(best_palette_color_map, color_map,
block_width * block_height * sizeof(color_map[0]));
*best_mbmi = *mbmi;
memcpy(blk_skip, x->txfm_search_info.blk_skip,
sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
av1_copy_array(tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
if (rate) *rate = this_rate;
if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
if (distortion) *distortion = tokenonly_rd_stats.dist;
if (skippable) *skippable = tokenonly_rd_stats.skip_txfm;
if (beat_best_palette_rd) *beat_best_palette_rd = 1;
}
}
若palette mode的RDCost超过当前的Best RDCost,则会被判断作为帧内预测方式,相应的 palette predictor 和 indice 会经过与邻居的predictor做预测后传输到decoder。