AV1代码学习：encoder_encode函数

最新推荐文章于 2025-04-15 14:40:25 发布

涵小呆

最新推荐文章于 2025-04-15 14:40:25 发布

阅读量2.4k

点赞数

分类专栏： AV1 文章标签：视频编码 AV1

本文链接：https://blog.csdn.net/BigDream123/article/details/109497488

版权

AV1 专栏收录该内容

24 篇文章

订阅专栏

从aom_codec_encode函数进入encoder_encode函数。

该函数的主要流程如下：

检查当前帧，计算编码数据尺寸ctx->cx_data_sz（编码数据保存在ctx->cx_data）
根据之前解析的配置设置编码信息（av1_change_config函数），在编码时，可以通过访问ctx->cpi的AV1_COMP结构体，访问到许多具体编码信息。
应用编码标志（av1_apply_encoding_flags）（这部分没有明白什么含义）
对一系列结构体和buffer进行初始化，包括lookahead结构体以及参考帧列表
调用av1_get_compressed_data函数进行编码，获得编码后的数据；返回-1指的是没有帧被编码（因为AV1有lag-in-frame，默认是19，也就是到第19帧才会开始编码第0帧）
如果存在编码数据（frame_size不为零），则将帧的头信息写入码流的头信息中

注意：

AV1中默认是两次编码过程，第一次编码的时候只是用来统计编码信息，并未真正进行编码，即不会将信息写入码流中；对于第二次编码才是真正的编码。

代码及注释如下：

static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
                                      const aom_image_t *img,
                                      aom_codec_pts_t pts,
                                      unsigned long duration,
                                      aom_enc_frame_flags_t enc_flags) {
  const size_t kMinCompressedSize = 8192;
  volatile aom_codec_err_t res = AOM_CODEC_OK;
  AV1_COMP *const cpi = ctx->cpi;//包含编码配置信息
  const aom_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio;
  volatile aom_codec_pts_t ptsvol = pts;
  // LAP context
  AV1_COMP *cpi_lap = ctx->cpi_lap;

  if (cpi == NULL) return AOM_CODEC_INVALID_PARAM;

  if (cpi->lap_enabled && cpi_lap == NULL && cpi->oxcf.pass == 0)
    return AOM_CODEC_INVALID_PARAM;

  if (img != NULL) { //如果图片不为空
    res = validate_img(ctx, img);
    // TODO(jzern) the checks related to cpi's validity should be treated as a
    // failure condition, encoder setup is done fully in init() currently.
	// TODO（jzern）与cpi的有效性相关的检查应该被视为一个失败的条件，编码器的设置目前完全在init()中完成。
    if (res == AOM_CODEC_OK) {
      size_t data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
                       ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img);//数据尺寸
      if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;//最小的数据尺寸
      if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
        ctx->cx_data_sz = data_sz;
        free(ctx->cx_data);
        ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
        if (ctx->cx_data == NULL) {
          return AOM_CODEC_MEM_ERROR;
        }
      }
    }
  }
  if (ctx->oxcf.mode != GOOD && ctx->oxcf.mode != REALTIME) {
    ctx->oxcf.mode = GOOD;
    av1_change_config(ctx->cpi, &ctx->oxcf); //设置编码配置
  }

  if (!ctx->pts_offset_initialized) {
    ctx->pts_offset = ptsvol;
    ctx->pts_offset_initialized = 1;
  }
  ptsvol -= ctx->pts_offset;

  aom_codec_pkt_list_init(&ctx->pkt_list);

  volatile aom_enc_frame_flags_t flags = enc_flags;

  // The jmp_buf is valid only for the duration of the function that calls
  // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
  // before it returns.
  if (setjmp(cpi->common.error.jmp)) {
    cpi->common.error.setjmp = 0;
    res = update_error_state(ctx, &cpi->common.error);
    aom_clear_system_state();
    return res;
  }
  cpi->common.error.setjmp = 1;
  if (cpi_lap != NULL) {
    if (setjmp(cpi_lap->common.error.jmp)) {
      cpi_lap->common.error.setjmp = 0;
      res = update_error_state(ctx, &cpi_lap->common.error);
      aom_clear_system_state();
      return res;
    }
    cpi_lap->common.error.setjmp = 1;
  }

  // Note(yunqing): While applying encoding flags, always start from enabling
  // all, and then modifying according to the flags. Previous frame's flags are
  // overwritten.
  // 注意（yunqing）：在应用编码标志时，总是从启用all开始，然后根据标志进行修改。上一帧的标志被覆盖。
  av1_apply_encoding_flags(cpi, flags);
  if (cpi_lap != NULL) {
    av1_apply_encoding_flags(cpi_lap, flags);
  }

  // Handle fixed keyframe intervals 处理固定的关键帧间隔
  if (is_stat_generation_stage(cpi)) {
    if (ctx->cfg.kf_mode == AOM_KF_AUTO &&
        ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
      if (cpi->common.spatial_layer_id == 0 &&
          ++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
        flags |= AOM_EFLAG_FORCE_KF;
        ctx->fixed_kf_cntr = 1;
      }
    }
  }

  if (res == AOM_CODEC_OK) {
    int64_t dst_time_stamp = timebase_units_to_ticks(timestamp_ratio, ptsvol);
    int64_t dst_end_time_stamp =
        timebase_units_to_ticks(timestamp_ratio, ptsvol + duration);

    // Set up internal flags
    if (ctx->base.init_flags & AOM_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;//使用PSNR

    if (img != NULL) {
      YV12_BUFFER_CONFIG sd;
      int use_highbitdepth, subsampling_x, subsampling_y;
      res = image2yuvconfig(img, &sd);//根据图片获得YUV配置
      use_highbitdepth = (sd.flags & YV12_FLAG_HIGHBITDEPTH) != 0;
      subsampling_x = sd.subsampling_x;
      subsampling_y = sd.subsampling_y;

      if (!cpi->lookahead) {
		// 我们开始编码之前有多少帧延迟
        int lag_in_frames = cpi_lap != NULL ? cpi_lap->oxcf.lag_in_frames
                                            : cpi->oxcf.lag_in_frames;
		//初始化lookahead阶段
		//lookahead阶段是一个帧缓冲区队列，当缓冲区排队时，可以对其进行一些分析。
        cpi->lookahead = av1_lookahead_init(
            cpi->oxcf.width, cpi->oxcf.height, subsampling_x, subsampling_y,
            use_highbitdepth, lag_in_frames, cpi->oxcf.border_in_pixels,
            cpi->common.features.byte_alignment, ctx->num_lap_buffers);
      }
      if (!cpi->lookahead)
        aom_internal_error(&cpi->common.error, AOM_CODEC_MEM_ERROR,
                           "Failed to allocate lag buffers");

      av1_check_initial_width(cpi, use_highbitdepth, subsampling_x,
                              subsampling_y);
      if (cpi_lap != NULL) {
        cpi_lap->lookahead = cpi->lookahead;
        av1_check_initial_width(cpi_lap, use_highbitdepth, subsampling_x,
                                subsampling_y);
      }

      // Store the original flags in to the frame buffer. Will extract the
      // key frame flag when we actually encode this frame.
	  // 将原始标志存储到帧缓冲区。将在实际编码此帧时提取关键帧标志。
      if (av1_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
                                dst_time_stamp, dst_end_time_stamp)) {
        res = update_error_state(ctx, &cpi->common.error);
      }
      ctx->next_frame_flags = 0;
    }//img != NULL

    unsigned char *cx_data = ctx->cx_data;
    size_t cx_data_sz = ctx->cx_data_sz;

    assert(!(cx_data == NULL && cx_data_sz != 0));

    /* Any pending invisible frames?任何挂起的不可见帧？ */
    if (ctx->pending_cx_data) {
      memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
      ctx->pending_cx_data = cx_data;
      cx_data += ctx->pending_cx_data_sz;
      cx_data_sz -= ctx->pending_cx_data_sz;

      /* TODO: this is a minimal check, the underlying codec doesn't respect
       * the buffer size anyway.
	   * 这是一个最小的检查，底层编解码器无论如何都不考虑缓冲区大小。
       */
      if (cx_data_sz < ctx->cx_data_sz / 2) {
        aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
                           "Compressed data buffer too small");
      }
    }

    size_t frame_size = 0;
    unsigned int lib_flags = 0;
    int is_frame_visible = 0;
    int index_size = 0;
    int has_fwd_keyframe = 0;

    // Call for LAP stage
    if (cpi_lap != NULL) {
      int status;
      aom_rational64_t timestamp_ratio_la = *timestamp_ratio;
      int64_t dst_time_stamp_la = dst_time_stamp;
      int64_t dst_end_time_stamp_la = dst_end_time_stamp;
      status = av1_get_compressed_data(
          cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
          &dst_end_time_stamp_la, !img, &timestamp_ratio_la);//获得编码后的数据
      if (status != -1) {
        if (status != AOM_CODEC_OK) {
          aom_internal_error(&cpi_lap->common.error, AOM_CODEC_ERROR, NULL);
        }
        cpi_lap->seq_params_locked = 1;
      }
      lib_flags = 0;
      frame_size = 0;
    }

    // invisible frames get packed with the next visible frame 
	// 不可见的帧与下一个可见的帧打包在一起
    while (cx_data_sz - index_size >= ctx->cx_data_sz / 2 &&
           !is_frame_visible) {
      const int status = av1_get_compressed_data(
          cpi, &lib_flags, &frame_size, cx_data, &dst_time_stamp,
          &dst_end_time_stamp, !img, timestamp_ratio);//获得编码后的数据
      if (status == -1) break;
      if (status != AOM_CODEC_OK) {
        aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
      }

      cpi->seq_params_locked = 1;//或已发送或未锁定参数。一旦锁定av1_change_config就无法更改seq参数。
      if (frame_size) {
        if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;

        const int write_temporal_delimiter =
            !cpi->common.spatial_layer_id && !ctx->pending_frame_count;

        if (write_temporal_delimiter) {
          uint32_t obu_header_size = 1;
          const uint32_t obu_payload_size = 0;
          const size_t length_field_size =
              aom_uleb_size_in_bytes(obu_payload_size);

          if (ctx->pending_cx_data) {
            const size_t move_offset = length_field_size + 1;
            memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
                    frame_size);
          }
          const uint32_t obu_header_offset = 0;
          obu_header_size = av1_write_obu_header(
              &cpi->level_params, OBU_TEMPORAL_DELIMITER, 0,
              (uint8_t *)(ctx->pending_cx_data + obu_header_offset));

          // OBUs are preceded/succeeded by an unsigned leb128 coded integer.
          if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
                                      ctx->pending_cx_data) != AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }

          frame_size += obu_header_size + obu_payload_size + length_field_size;
        }

        if (ctx->oxcf.save_as_annexb) {
          size_t curr_frame_size = frame_size;
          if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
              AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }
          frame_size = curr_frame_size;

          // B_PRIME (add frame size)
          const size_t length_field_size = aom_uleb_size_in_bytes(frame_size);
          if (ctx->pending_cx_data) {
            const size_t move_offset = length_field_size;
            memmove(cx_data + move_offset, cx_data, frame_size);
          }
          if (av1_write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
              AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }
          frame_size += length_field_size;
        }

        ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
        ctx->pending_cx_data_sz += frame_size;

        cx_data += frame_size;
        cx_data_sz -= frame_size;

        index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2;

        is_frame_visible = cpi->common.show_frame;

        has_fwd_keyframe |= (!is_frame_visible &&
                             cpi->common.current_frame.frame_type == KEY_FRAME);
      }
    }

    if (is_frame_visible) {
      // Add the frame packet to the list of returned packets.
	  // 将帧数据包添加到返回的数据包列表中。
      aom_codec_cx_pkt_t pkt;

      if (ctx->oxcf.save_as_annexb) {
        //  B_PRIME (add TU size)
        size_t tu_size = ctx->pending_cx_data_sz;
        const size_t length_field_size = aom_uleb_size_in_bytes(tu_size);
        if (ctx->pending_cx_data) {
          const size_t move_offset = length_field_size;
          memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
                  tu_size);
        }
        if (av1_write_uleb_obu_size(0, (uint32_t)tu_size,
                                    ctx->pending_cx_data) != AOM_CODEC_OK) {
          aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
        }
        ctx->pending_cx_data_sz += length_field_size;
      }

      pkt.kind = AOM_CODEC_CX_FRAME_PKT;

      pkt.data.frame.buf = ctx->pending_cx_data;
      pkt.data.frame.sz = ctx->pending_cx_data_sz;
      pkt.data.frame.partition_id = -1;
      pkt.data.frame.vis_frame_size = frame_size;

      pkt.data.frame.pts =
          ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
          ctx->pts_offset;
      pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
      if (has_fwd_keyframe) {
        // If one of the invisible frames in the packet is a keyframe, set
        // the delayed random access point flag.
		// 如果包中的一个不可见帧是关键帧，则设置延迟随机访问点标志。
        pkt.data.frame.flags |= AOM_FRAME_IS_DELAYED_RANDOM_ACCESS_POINT;
      }
      pkt.data.frame.duration = (uint32_t)ticks_to_timebase_units(
          timestamp_ratio, dst_end_time_stamp - dst_time_stamp);

      aom_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);

      ctx->pending_cx_data = NULL;
      ctx->pending_cx_data_sz = 0;
      ctx->pending_frame_count = 0;
    }
  } //if(res)

  cpi->common.error.setjmp = 0;
  return res;
}