FFmpeg HEVC 多线程解码解析

说明:

HEVC解码并行分3个级别:frame并行,slice并行和wpp。FFmpeg默认提供frame并行和slice并行的框架。针对HEVC,FFmpeg实现的帧内并行是wpp。

1)关于thread_type,也就是并行模式,其实分两种:slice并行和fram+slice并行(注意这句话:Frame thread:Restrictions with slice threading also apply)。所以openHEVC在frame thread init中也会slice thread init;优先判断frame thread;参数命名上,也是默认参数 for slice,特别注明的才是 for frame。

2)FFmpeg中并行解码部分稍显混乱,264与265共用了部分上层框架,但264的并行解码又有若干问题,间接影响了265。

3)openHEVC的并行解码代码就清晰不少,FFMpeg中并行解码部分大概同步到openHEVC 2013年10月提交,后面可能因为框架原因,没有再同步。

 

 

1 validate_thread_parameters

 

设置active_thread_type 对应并行级别

 

/**
 * Set the threading algorithms used.            
 *
 * Threading requires more than one thread.
 * Frame threading requires entire frames to be passed to the codec,
 * and introduces extra decoding delay, so is incompatible with low_delay.
 *
 * @param avctx The context.
 */
static void validate_thread_parameters(AVCodecContext *avctx)
{
    //! 帧级并行支持标记
    int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
                                && !(avctx->flags  & AV_CODEC_FLAG_TRUNCATED)
                                && !(avctx->flags  & AV_CODEC_FLAG_LOW_DELAY)
                                && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS);
    if (avctx->thread_count == 1) {                       ///< 多核支持
        avctx->active_thread_type = 0;
    } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) {   ///< codec设置
        avctx->active_thread_type = FF_THREAD_FRAME;
    } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS &&
               avctx->thread_type & FF_THREAD_SLICE) {    ///< slice级并行
        avctx->active_thread_type = FF_THREAD_SLICE;
    } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) {
        avctx->thread_count       = 1;
        avctx->active_thread_type = 0;
    }                                                     ///< auto 已设置

    if (avctx->thread_count > MAX_AUTO_THREADS)
        av_log(avctx, AV_LOG_WARNING,
               "Application has requested %d threads. Using a thread count greater than %d is not recommended.\n",
               avctx->thread_count, MAX_AUTO_THREADS);
}

 

.capabilities := AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS

 

12322 = 1<< 1 | 1<< 5 | 1<< 13 | 1<< 12

 

 

 

2

3

4

 

ff_thread_init   pthread.c

int ff_thread_init(AVCodecContext *avctx)
{
    validate_thread_parameters(avctx);      ///< 参1 设置avctx->active_thread_type

    if (avctx->active_thread_type&FF_THREAD_SLICE)
        return ff_slice_thread_init(avctx); ///< 参7
    else if (avctx->active_thread_type&FF_THREAD_FRAME)
        return ff_frame_thread_init(avctx); ///< 参6

    return 0;
}

avcodec_open2

-ff_thread_init
--ff_slice_thread_initactive_thread_type & FF_THREAD_SLICE
--ff_frame_thread_initactive_thread_type & FF_THREAD_FRAME


从pthread.c中分离出frame, slice级别的代码,独立为pthread_frame.c和pthread_slice.c。

 

 

 

ff_frame_thread_init   pthread_frame.c

 

int ff_frame_thread_init(AVCodecContext *avctx)
{
    int thread_count = avctx->thread_count;
    const AVCodec *codec = avctx->codec;
    AVCodecContext *src = avctx;
    FrameThreadContext *fctx;
    int i, err = 0;

#if HAVE_W32THREADS
    w32thread_init();
#endif

    if (!thread_count) {         ///< 初始化codec阶段未设置thread_count或者设置0,则根据cpu数适配
        int nb_cpus = av_cpu_count();
        if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) || avctx->debug_mv)
            nb_cpus = 1;
        // use number of cores + 1 as thread count if there is more than one
        if (nb_cpus > 1)
            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
        else
            thread_count = avctx->thread_count = 1;
    }

    if (thread_count <= 1) {			///< 不使用并行
        avctx->active_thread_type = 0;
        return 0;
    }

    avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext));///< frame thread context
    if (!fctx)
        return AVERROR(ENOMEM);

    fctx->threads = av_mallocz_array(thread_count, sizeof(PerThreadContext));	///< 初始化线程池: thread_count
    if (!fctx->threads) {
        av_freep(&avctx->internal->thread_ctx);
        return AVERROR(ENOMEM);
    }

    pthread_mutex_init(&fctx->buffer_mutex, NULL);
    fctx->delaying = 1;

    for (i = 0; i < thread_count; i++) {
        AVCodecContext *copy = av_malloc(sizeof(AVCodecContext));
        PerThreadContext *p  = &fctx->threads[i];

        pthread_mutex_init(&p->mutex, NULL);
        pthread_mutex_init(&p->progress_mutex, NULL);
        pthread_cond_init(&p->input_cond, NULL);
        pthread_cond_init(&p->progress_cond, NULL);
        pthread_cond_init(&p->output_cond, NULL);

        p->frame = av_frame_alloc();
        if (!p->frame) {
            av_freep(&copy);
            err = AVERROR(ENOMEM);
            goto error;
        }

        p->parent = fctx;
        p->avctx  = copy;

        if (!copy) {
            err = AVERROR(ENOMEM);
            goto error;
        }

        *copy = *src;

        copy->internal = av_malloc(sizeof(AVCodecInternal));
        if (!copy->internal) {
            copy->priv_data = NULL;
            err = AVERROR(ENOMEM);
            goto error;
        }
        *copy->internal = *src->internal;
        copy->internal->thread_ctx = p;
        copy->internal->pkt = &p->avpkt;

        if (!i) {
            src = copy;

            if (codec->init)
                err = codec->init(copy);

            update_context_from_thread(avctx, copy, 1);		///< 更新下一个线程的AVCodecContext参考线程的上下文
        } else {
            copy->priv_data = av_malloc(codec->priv_data_size);
            if (!copy->priv_data) {
                err = AVERROR(ENOMEM);
                goto error;
            }
            memcpy(copy->priv_data, src->priv_data, codec->priv_data_size);
            copy->internal->is_copy = 1;

            if (codec->init_thread_copy)
                err = codec->init_thread_copy(copy);
        }

        if (err) goto error;

        err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));///< frame级解码线程
        p->thread_init= !err;
        if(!p->thread_init)
            goto error;
    }

    return 0;

error:
    ff_frame_thread_free(avctx, i+1);

    return err;
}

 

 

7 ff_slice_thread_init   pthread_slice.c

 

 

 

int ff_slice_thread_init(AVCodecContext *avctx)
{
    int i;
    SliceThreadContext *c;
    int thread_count = avctx->thread_count;

#if HAVE_W32THREADS
    w32thread_init();
#endif

    // We cannot do this in the encoder init as the threads are created before
    if (av_codec_is_encoder(avctx->codec) &&
        avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
        avctx->height > 2800)
        thread_count = avctx->thread_count = 1;

    if (!thread_count) {			///< 若thread_count为0 则根据cpu数适配
        int nb_cpus = av_cpu_count();
        if  (avctx->height)
            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);
        // use number of cores + 1 as thread count if there is more than one
        if (nb_cpus > 1)			///< cores + 1, 16(264的原因限制)
            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
        else
            thread_count = avctx->thread_count = 1;
    }

    if (thread_count <= 1) {
        avctx->active_thread_type = 0;
        return 0;
    }

    c = av_mallocz(sizeof(SliceThreadContext));	///< slice thread context
    if (!c)
        return -1;

    c->workers = av_mallocz_array(thread_count, sizeof(pthread_t));
    if (!c->workers) {
        av_free(c);
        return -1;
    }

    avctx->internal->thread_ctx = c;
    c->current_job = 0;
    c->job_count = 0;
    c->job_size = 0;
    c->done = 0;
    pthread_cond_init(&c->current_job_cond, NULL);
    pthread_cond_init(&c->last_job_cond, NULL);
    pthread_mutex_init(&c->current_job_lock, NULL);
    pthread_mutex_lock(&c->current_job_lock);
    for (i=0; i<thread_count; i++) {
        if(pthread_create(&c->workers[i], NULL, worker, avctx)) {	///< worker线程创建
           avctx->thread_count = i;
           pthread_mutex_unlock(&c->current_job_lock);
           ff_thread_free(avctx);
           return -1;
        }
    }

    thread_park_workers(c, thread_count);     ///< 设置thread_count(成功创建的worker现程数)

    avctx->execute = thread_execute;
    avctx->execute2 = thread_execute2;
    return 0;
}			///< 若thread_count为0 则根据cpu数适配
        int nb_cpus = av_cpu_count();
        if  (avctx->height)
            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);
        // use number of cores + 1 as thread count if there is more than one
        if (nb_cpus > 1)			///< cores + 1, 16(264的原因限制)
            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
        else
            thread_count = avctx->thread_count = 1;
    }

    if (thread_count <= 1) {
        avctx->active_thread_type = 0;
        return 0;
    }

    c = av_mallocz(sizeof(SliceThreadContext));	///< slice thread context
    if (!c)
        return -1;

    c->workers = av_mallocz_array(thread_count, sizeof(pthread_t));
    if (!c->workers) {
        av_free(c);
        return -1;
    }

    avctx->internal->thread_ctx = c;
    c->current_job = 0;
    c->job_count = 0;
    c->job_size = 0;
    c->done = 0;
    pthread_cond_init(&c->current_job_cond, NULL);
    pthread_cond_init(&c->last_job_cond, NULL);
    pthread_mutex_init(&c->current_job_lock, NULL);
    pthread_mutex_lock(&c->current_job_lock);
    for (i=0; i<thread_count; i++) {
        if(pthread_create(&c->workers[i], NULL, worker, avctx)) {	///< worker线程创建
           avctx->thread_count = i;
           pthread_mutex_unlock(&c->current_job_lock);
           ff_thread_free(avctx);
           return -1;
        }
    }

    thread_park_workers(c, thread_count);     ///< 设置thread_count(成功创建的worker现程数)

    avctx->execute = thread_execute;
    avctx->execute2 = thread_execute2;
    return 0;
}


openHEVC中,frame,slice thread 参数分开,更清晰!

 

 

 

 

 

 

8 submit_packet

主线程将packet submit给解码线程 frame worker thread。线程的异步操作,用到条件变量和互斥量。

 

//! 异步通信frame thread p解码packet avpkt     PerThreadContext* 存储threads' context
static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
{
    FrameThreadContext *fctx = p->parent;
    PerThreadContext *prev_thread = fctx->prev_thread;
    const AVCodec *codec = p->avctx->codec;

    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
        return 0;

    pthread_mutex_lock(&p->mutex);

    release_delayed_buffers(p);

    if (prev_thread) {
        int err;
        if (prev_thread->state == STATE_SETTING_UP) {
            pthread_mutex_lock(&prev_thread->progress_mutex);
            while (prev_thread->state == STATE_SETTING_UP)
                pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex);
            pthread_mutex_unlock(&prev_thread->progress_mutex);
        }

        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);  ///< prev_thread状态变更(setup->finished),则更新context
        if (err) {
            pthread_mutex_unlock(&p->mutex);
            return err;
        }
    }

    av_packet_unref(&p->avpkt);
    av_packet_ref(&p->avpkt, avpkt);

    p->state = STATE_SETTING_UP;
    pthread_cond_signal(&p->input_cond);    ///< 发送input packet完成准备的信号 -> frame解码线程
    pthread_mutex_unlock(&p->mutex);

    /*
     * If the client doesn't have a thread-safe get_buffer(),
     * then decoding threads call back to the main thread,
     * and it calls back to the client here.
     */

    if (!p->avctx->thread_safe_callbacks && (
         p->avctx->get_format != avcodec_default_get_format ||
         p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {
        while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) {
            int call_done = 1;
            pthread_mutex_lock(&p->progress_mutex);
            while (p->state == STATE_SETTING_UP)
                pthread_cond_wait(&p->progress_cond, &p->progress_mutex);   ///< 等待解码线程的progress_cond
                                                                            ///< 使用通用接口get_buffer()获取
            switch (p->state) {
            case STATE_GET_BUFFER:
                p->result = ff_get_buffer(p->avctx, p->requested_frame, p->requested_flags);
                break;
            case STATE_GET_FORMAT:
                p->result_format = ff_get_format(p->avctx, p->available_formats);
                break;
            default:
                call_done = 0;
                break;
            }
            if (call_done) {
                p->state  = STATE_SETTING_UP;
                pthread_cond_signal(&p->progress_cond);
            }
            pthread_mutex_unlock(&p->progress_mutex);
        }
    }

    fctx->prev_thread = p;
    fctx->next_decoding++;

    return 0;
}

 

 

 

 

 

 

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值