[AV1] Palette Intra Prediction

最新推荐文章于 2023-06-16 17:00:06 发布

轻舞飞扬SR

最新推荐文章于 2023-06-16 17:00:06 发布

阅读量1.1k

点赞数 3

分类专栏： AV1 文章标签： av1 video codec codec videc intra pred

本文链接：https://blog.csdn.net/starperfection/article/details/111934691

版权

AV1 专栏收录该内容

37 篇文章 53 订阅

订阅专栏

返回AV1专栏目录

这篇文章主要分析AV1中的Palette Mode，但是AV1的Palette Mode也不会跟其他标准中的Palette Mode有多大的差别，我的另外一篇关于Palette Mode的文章讲了HEVC中的Palette Mode，可以参考一下。

AV1的Palette Mode

在AV1的palette mode，与其他标准不同的一点是，像HEVC中，palette mode是作为hevc scc的一个单独工具存在的，属于HEVC 的extension，而AV1的palette mode是规定在specification中，而且默认是处于开启状态的。

AV1 的palette mode 属于帧内预测编码中的一环，从代码中可以看得出来，其函数位于 **_intra_sb* 中，其对Luma和chroma采用不同的palette predictor，每一个predictor可以有2-8种颜色，在LibAOM中的定义为

// Maximum number of colors in a palette.
#define PALETTE_MAX_SIZE 8
// Minimum number of colors in a palette.
#define PALETTE_MIN_SIZE 2

不同的palette predictor意味着palette predictor预测表所包含的颜色数量可能不一样，同样，块的每一个像素mapping的color index也可能不同。

AOM LibAV1中 palette mode在编码器中的代码入口为 av1_rd_pick_palette_intra_sby 函数

// Searches palette
if (try_palette)
{
    av1_rd_pick_palette_intra_sby(
            cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
            &best_rd, &best_model_rd, rate, rate_tokenonly, distortion, skippable,
            &beat_best_rd, ctx, ctx->blk_skip, ctx->tx_type_map);
}

整体的结构代码

void av1_rd_pick_palette_intra_sby(
    const AV1_COMP* cpi,
    MACROBLOCK* x,
    BLOCK_SIZE bsize,
    int dc_mode_cost,
    MB_MODE_INFO* best_mbmi,
    uint8_t* best_palette_color_map,
    int64_t* best_rd,
    int64_t* best_model_rd,
    int* rate,
    int* rate_tokenonly,
    int64_t* distortion,
    int* skippable,
    int* beat_best_rd,
    PICK_MODE_CONTEXT* ctx,
    uint8_t* best_blk_skip,
    uint8_t* tx_type_map)
{
    MACROBLOCKD* const xd = &x->e_mbd;
    MB_MODE_INFO* const mbmi = xd->mi[0];
    assert(!is_inter_block(mbmi));
    assert(av1_allow_palette(cpi->common.features.allow_screen_content_tools, bsize));
    assert(PALETTE_MAX_SIZE == 8);
    assert(PALETTE_MIN_SIZE == 2);

    const int src_stride = x->plane[0].src.stride;
    const uint8_t* const src = x->plane[0].src.buf;
    int block_width, block_height, rows, cols;
    // 获取当前块的大小
    av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, &cols);
    const SequenceHeader* const seq_params = &cpi->common.seq_params;
    const int is_hbd = seq_params->use_highbitdepth;
    const int bit_depth = seq_params->bit_depth;
    int unused;

    int count_buf[1 << 12];      // Maximum (1 << 12) color levels.
    int count_buf_8bit[1 << 8];  // Maximum (1 << 8) bins for hbd path.
    int colors, colors_threshold = 0;
    if (is_hbd)
    {
        av1_count_colors_highbd(src, src_stride, rows, cols, bit_depth, count_buf, count_buf_8bit, &colors_threshold, &colors);
    }
    else
    {
        // 通过计算直方图统计当前block中一共包含多少种颜色
        av1_count_colors(src, src_stride, rows, cols, count_buf, &colors);
        colors_threshold = colors;
    }

    uint8_t* const color_map = xd->plane[0].color_index_map;
    if (colors_threshold > 1 && colors_threshold <= 64)
    {
        int* const data = x->palette_buffer->kmeans_data_buf;
        int centroids[PALETTE_MAX_SIZE];
        int lower_bound, upper_bound;
        // data中保存当前CU的所有unique灰度值，lower_bound和upper_bound分别是这些灰度值中最小与最大值。
        fill_data_and_get_bounds(src, src_stride, rows, cols, is_hbd, data, &lower_bound, &upper_bound);

        mbmi->mode = DC_PRED;
        mbmi->filter_intra_mode_info.use_filter_intra = 0;

        uint16_t color_cache[2 * PALETTE_MAX_SIZE];
        const int n_cache = av1_get_palette_cache(xd, 0, color_cache);

        // Find the dominant colors, stored in top_colors[].
        // 这里把之前放到data里的所有的灰度值，获取出现次数最多的前PALETTE_MAX_SIZE个放入top_colors中。
        int top_colors[PALETTE_MAX_SIZE] = { 0 };
        for (int i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i)
        {
            int max_count = 0;
            for (int j = 0; j < (1 << bit_depth); ++j)
            {
                if (count_buf[j] > max_count)
                {
                    max_count = count_buf[j];
                    top_colors[i] = j;
                }
            }
            assert(max_count > 0);
            count_buf[top_colors[i]] = 0;
        }

        // TODO(huisu@google.com): Try to avoid duplicate computation in cases
        // where the dominant colors and the k-means results are similar.
        if ((cpi->sf.intra_sf.prune_palette_search_level == 1) && (colors > PALETTE_MIN_SIZE))
        {
            // Start index and step size below are chosen to evaluate unique
            // candidates in neighbor search, in case a winner candidate is found in
            // coarse search. Example,
            // 1) 8 colors (end_n = 8): 2,3,4,5,6,7,8. start_n is chosen as 2 and step
            // size is chosen as 3. Therefore, coarse search will evaluate 2, 5 and 8.
            // If winner is found at 5, then 4 and 6 are evaluated. Similarly, for 2
            // (3) and 8 (7).
            // 2) 7 colors (end_n = 7): 2,3,4,5,6,7. If start_n is chosen as 2 (same
            // as for 8 colors) then step size should also be 2, to cover all
            // candidates. Coarse search will evaluate 2, 4 and 6. If winner is either
            // 2 or 4, 3 will be evaluated. Instead, if start_n=3 and step_size=3,
            // coarse search will evaluate 3 and 6. For the winner, unique neighbors
            // (3: 2,4 or 6: 5,7) would be evaluated.

            // Start index for coarse palette search for dominant colors and k-means
            const uint8_t start_n_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
                                                                         3, 3, 2,
                                                                         3, 3, 2 };
            // Step size for coarse palette search for dominant colors and k-means
            const uint8_t step_size_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
                                                                           3, 3, 3,
                                                                           3, 3, 3 };

            // Choose the start index and step size for coarse search based on number
            // of colors
            const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE);
            const int min_n = start_n_lookup_table[max_n];
            const int step_size = step_size_lookup_table[max_n];
            assert(min_n >= PALETTE_MIN_SIZE);

            // Perform top color coarse palette search to find the winner candidate
            const int top_color_winner = perform_top_color_palette_search(
                cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
                step_size, &unused, color_cache, n_cache, best_mbmi,
                best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
                distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
            // Evaluate neighbors for the winner color (if winner is found) in the
            // above coarse search for dominant colors
            if (top_color_winner <= max_n) {
                int stage2_min_n, stage2_max_n, stage2_step_size;
                set_stage2_params(&stage2_min_n, &stage2_max_n, &stage2_step_size,
                    top_color_winner, max_n);
                // perform finer search for the winner candidate
                perform_top_color_palette_search(
                    cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, stage2_min_n,
                    stage2_max_n + 1, stage2_step_size, &unused, color_cache, n_cache,
                    best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate,
                    rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
                    best_blk_skip, tx_type_map);
            }
            // K-means clustering.
            // Perform k-means coarse palette search to find the winner candidate
            const int k_means_winner = perform_k_means_palette_search(
                cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
                min_n, max_n + 1, step_size, &unused, color_cache, n_cache, best_mbmi,
                best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
                distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
                color_map, rows * cols);
            // Evaluate neighbors for the winner color (if winner is found) in the
            // above coarse search for k-means
            if (k_means_winner <= max_n) {
                int start_n_stage2, end_n_stage2, step_size_stage2;
                set_stage2_params(&start_n_stage2, &end_n_stage2, &step_size_stage2, k_means_winner, max_n);
                // perform finer search for the winner candidate
                perform_k_means_palette_search(
                    cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
                    start_n_stage2, end_n_stage2 + 1, step_size_stage2, &unused,
                    color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
                    best_model_rd, rate, rate_tokenonly, distortion, skippable,
                    beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
                    rows * cols);
            }
        }
        else {
            const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
                      min_n = PALETTE_MIN_SIZE;
            // Perform top color palette search in descending order
            int last_n_searched = max_n;
            perform_top_color_palette_search(cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, max_n, min_n - 1,
                -1, &last_n_searched, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);

            if (last_n_searched > min_n) 
            {
                // Search in ascending order until we get to the previous best
                perform_top_color_palette_search( cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n,
                    last_n_searched, 1, &unused, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
            }
            // K-means clustering.
            if (colors == PALETTE_MIN_SIZE)
            {
                // Special case: These colors automatically become the centroids.
                assert(colors == 2);
                centroids[0] = lower_bound;
                centroids[1] = upper_bound;
                palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, colors, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, 
                    beat_best_rd, ctx, best_blk_skip, tx_type_map, NULL);
            }
            else {
                // Perform k-means palette search in descending order
                last_n_searched = max_n;
                perform_k_means_palette_search(cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound, max_n, min_n - 1, -1, &last_n_searched, color_cache, n_cache,
                    best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map, rows * cols);
                if (last_n_searched > min_n) {
                    // Search in ascending order until we get to the previous best
                    perform_k_means_palette_search( cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound, min_n, last_n_searched, 1, &unused, color_cache, n_cache,
                        best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map, rows * cols);
                }
            }
        }
    }

    if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
        memcpy(color_map, best_palette_color_map,
            block_width * block_height * sizeof(best_palette_color_map[0]));
    }
    *mbmi = *best_mbmi;
}

函数 av1_count_colors是通过计算直方图统计当前block中一共包含多少种颜色。

void av1_count_colors(const uint8_t *src, int stride, int rows, int cols, int *val_count, int *num_colors) 
{
    const int max_pix_val = 1 << 8;
    memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
    for (int r = 0; r < rows; ++r) 
    {
        for (int c = 0; c < cols; ++c) 
        {
            const int this_val = src[r * stride + c];
            assert(this_val < max_pix_val);
            ++val_count[this_val];
        }
    }
    int n = 0;
    for (int i = 0; i < max_pix_val; ++i) 
    {
        if (val_count[i]) ++n;
    }
    *num_colors = n;
}

关键函数perform_top_color_palette_search 负责把palette mode的2-8种颜色来与当前block的每一个像素（luma+chroma）进行mapping，通过RDO来确认最佳的颜色数量个数与每个像素的color indice。

// Performs count-based palette search with number of colors in interval [start_n, end_n) with step size step_size. If step_size < 0, then end_n can
// be less than start_n. Saves the last numbers searched in last_n_searched and returns the best number of colors found.
static AOM_INLINE int perform_top_color_palette_search(
    const AV1_COMP* const cpi, MACROBLOCK* x, MB_MODE_INFO* mbmi, BLOCK_SIZE bsize, int dc_mode_cost, const int* data, int* top_colors,
    int start_n, int end_n, int step_size, int* last_n_searched, uint16_t* color_cache, int n_cache, MB_MODE_INFO* best_mbmi, uint8_t* best_palette_color_map, int64_t* best_rd, int64_t* best_model_rd, 
    int* rate, int* rate_tokenonly, int64_t* distortion, int* skippable, int* beat_best_rd, PICK_MODE_CONTEXT* ctx, uint8_t* best_blk_skip, uint8_t* tx_type_map) 
{
    int centroids[PALETTE_MAX_SIZE];
    int n = start_n;
    int top_color_winner = end_n;
    /* clang-format off */
    assert(IMPLIES(step_size < 0, start_n > end_n));
    /* clang-format on */
    assert(IMPLIES(step_size > 0, start_n < end_n));
    // 从后(8)到前(2) 、从前(2)到后(8)，抽取不同个数的颜色来计算rdcost，最后进行比较，获取最好。
    while (!is_iter_over(n, end_n, step_size)) 
    {
        int beat_best_palette_rd = 0;
        memcpy(centroids, top_colors, n * sizeof(top_colors[0]));
        palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n, color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly, distortion, 
            skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, &beat_best_palette_rd);
        *last_n_searched = n;
        if (beat_best_palette_rd) 
        {
            top_color_winner = n;
        }
        else if (cpi->sf.intra_sf.prune_palette_search_level == 2) 
        {
            // At search level 2, we return immediately if we don't see an improvement
            return top_color_winner;
        }
        n += step_size;
    }
    return top_color_winner;
}

在这个函数里，就是靠函数 palette_rd_y 来计算rdcost，返回一个beat_best_palette_rd 这个变量指明是否当前选项（颜色数量+color indice组合）的RD超过 best RD。

/*!\brief Calculate the luma palette cost from a given color palette
 *
 * \ingroup palette_mode_search
 * \callergraph
 * Given the base colors as specified in centroids[], calculate the RD cost
 * of palette mode.
 */
static AOM_INLINE void palette_rd_y(
    const AV1_COMP* const cpi, 
    MACROBLOCK* x, 
    MB_MODE_INFO* mbmi,
    BLOCK_SIZE bsize, 
    int dc_mode_cost, 
    const int* data, 
    int* centroids, 
    int n,
    uint16_t* color_cache, 
    int n_cache, 
    MB_MODE_INFO* best_mbmi,
    uint8_t* best_palette_color_map, 
    int64_t* best_rd, 
    int64_t* best_model_rd,
    int* rate, 
    int* rate_tokenonly, 
    int64_t* distortion, 
    int* skippable,
    int* beat_best_rd, 
    PICK_MODE_CONTEXT* ctx, 
    uint8_t* blk_skip,
    uint8_t* tx_type_map, 
    int* beat_best_palette_rd) 
{
    optimize_palette_colors(color_cache, n_cache, n, 1, centroids, cpi->common.seq_params.bit_depth);
    const int num_unique_colors = av1_remove_duplicates(centroids, n);
    if (num_unique_colors < PALETTE_MIN_SIZE) 
    {
        // Too few unique colors to create a palette. And DC_PRED will work
        // well for that case anyway. So skip.
        return;
    }
    PALETTE_MODE_INFO* const pmi = &mbmi->palette_mode_info;
    if (cpi->common.seq_params.use_highbitdepth) 
    {
        for (int i = 0; i < num_unique_colors; ++i) 
        {
            pmi->palette_colors[i] = clip_pixel_highbd((int)centroids[i], cpi->common.seq_params.bit_depth);
        }
    }
    else {
        for (int i = 0; i < num_unique_colors; ++i) 
        {
            pmi->palette_colors[i] = clip_pixel(centroids[i]);
        }
    }
    pmi->palette_size[0] = num_unique_colors;
    MACROBLOCKD* const xd = &x->e_mbd;
    uint8_t* const color_map = xd->plane[0].color_index_map;
    int block_width, block_height, rows, cols;
    av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, &cols);
    
    // 将当前Block中的每一个pixel都归类到centroid中来，形成color_map，color map的大小为 block_width * block_height
    av1_calc_indices(data, centroids, color_map, rows * cols, num_unique_colors, 1);
    
    // 若块处于图像的边缘，可能会导致当前块的一部分并不存在color map值，所以该处是填充那些处于帧外的部分，用拷贝最近的像素进行填充
    extend_palette_color_map(color_map, cols, rows, block_width, block_height);

    if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) 
    {
        return;
    }

    RD_STATS tokenonly_rd_stats;
    av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
    if (tokenonly_rd_stats.rate == INT_MAX) return;
    const int palette_mode_cost = intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
    int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
    int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
    if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) 
    {
        tokenonly_rd_stats.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
    }
    // Collect mode stats for multiwinner mode processing
    const int txfm_search_done = 1;
    store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_DC, color_map, bsize, this_rd, cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
    if (this_rd < *best_rd) 
    {
        *best_rd = this_rd;
        // Setting beat_best_rd flag because current mode rd is better than best_rd.
        // This flag need to be updated only for palette evaluation in key frames
        if (beat_best_rd) *beat_best_rd = 1;
        memcpy(best_palette_color_map, color_map,
            block_width * block_height * sizeof(color_map[0]));
        *best_mbmi = *mbmi;
        memcpy(blk_skip, x->txfm_search_info.blk_skip,
            sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
        av1_copy_array(tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
        if (rate) *rate = this_rate;
        if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
        if (distortion) *distortion = tokenonly_rd_stats.dist;
        if (skippable) *skippable = tokenonly_rd_stats.skip_txfm;
        if (beat_best_palette_rd) *beat_best_palette_rd = 1;
    }
}