【AV1代码研究一】万事开头难——aomenc main函数分析

【AV1代码研究一】万事开头难——aomenc main函数分析


熟话说万事开头难,终于要开始正经研究AV1的代码了,从main函数开始,下面以代码注释的形式进行代码分析,如果有错漏欢迎大家不吝指出,先谢过。

重要的结构体

AvxInputContext

标识输入文件特征的结构体。

struct AvxInputContext {
  const char *filename;
  FILE *file;
  int64_t length;
  struct FileTypeDetectionBuffer detect;//输入文件buffer
  enum VideoFileType file_type;//枚举类型   FILE_TYPE_OBU, FILE_TYPE_RAW, 
                               //          FILE_TYPE_IVF, FILE_TYPE_Y4M, 
                               //          FILE_TYPE_WEBM
  uint32_t width;
  uint32_t height;
  struct AvxRational pixel_aspect_ratio;//结构体类型,包括一个int型分子和int
                                      //型分母,从变量名看用于标识像素长宽比。
                                      
  aom_img_fmt_t fmt;//枚举类型,标识图像格式。
  aom_bit_depth_t bit_depth;//枚举类型,标识像素位深。AOM_BITS_8 
                            //                      AOM_BITS_10
                            //                      AOM_BITS_12

  int only_i420;
  uint32_t fourcc;
  struct AvxRational framerate;//结构体类型,包括一个int型分子和int
                               //型分母,从变量名看用于标识帧率。
#if CONFIG_AV1_ENCODER
  y4m_input y4m;//y4m输入,用于转码?
#endif
};

AvxEncoderConfig

该结构体大部分成员用于存放命令行参数。命令行参数详见aomenc.exe命令行参数简析。只有

/* Configuration elements common to all streams. */
struct AvxEncoderConfig {
  const struct AvxInterface *codec; //包含编\解码器私有的函数指针和数据。该结构对于应用程序是不可见的。
									//该指针(codec)是常量指针,指向的内容无法被修改。
  int passes;
  int pass;
  int usage;
  ColorInputType color_type;
  int quiet;
  int verbose;
  int limit;
  int skip_frames;
  int show_psnr;
  enum TestDecodeFatality test_decode;
  int have_framerate;
  struct aom_rational framerate;
  int debug;
  int show_q_hist_buckets;
  int show_rate_hist_buckets;
  int disable_warnings;
  int disable_warning_prompt;
  int experimental_bitstream;
  aom_chroma_sample_position_t csp;
  cfg_options_t encoder_config;
};

stream_state

一个单链表结构体,用于存储编码配置和指向下一组编码参数。

struct stream_state {
  int index;
  struct stream_state *next;//指向下一个stream_state
  struct stream_config config;//该段stream的配置,包含aom_codec_enc_cfg
  FILE *file;
  struct rate_hist *rate_hist;
  struct WebmOutputContext webm_ctx;
  uint64_t psnr_sse_total;
  uint64_t psnr_samples_total;
  double psnr_totals[4];
  int psnr_count;
  int counts[64];
  aom_codec_ctx_t encoder;//指定的编码器
  unsigned int frames_out;
  uint64_t cx_time;
  size_t nbytes;
  stats_io_t stats;
  struct aom_image *img;
  aom_codec_ctx_t decoder;//指定的解码器
  int mismatch_seen;
  unsigned int chroma_subsampling_x;
  unsigned int chroma_subsampling_y;
};

代码注释

int main(int argc, const char **argv_) {
  int pass;
  aom_image_t raw;
  aom_image_t raw_shift;
  int allocated_raw_shift = 0;
  int use_16bit_internal = 0;
  int input_shift = 0;
  int frame_avail, got_data;

  struct AvxInputContext input;//用于读取输入文件特征
  struct AvxEncoderConfig global;//用于读取编码配置
  struct stream_state *streams = NULL;//一个单链表,每个节点都有对应的编码配置
  									  //和码流输出信息,猜测是用于多路输出编码。
  char **argv, **argi;
  uint64_t cx_time = 0;
  int stream_cnt = 0;
  int res = 0;
  int profile_updated = 0;

  memset(&input, 0, sizeof(input));
  exec_name = argv_[0];

  /* Setup default input stream settings */
  input.framerate.numerator = 30;
  input.framerate.denominator = 1;
  input.only_i420 = 1;
  input.bit_depth = 0;

  /* First parse the global configuration values, because we want to apply
   * other parameters on top of the default configuration provided by the
   * codec.
   */
  argv = argv_dup(argc - 1, argv_ + 1);//将命令行输入的命令复制一份
  parse_global_config(&global, &argv);//将命令初始化到AvxEncoderConfig结构体中
  //parse_global_config()这个函数首先以默认参数初始化少数变量,然后解析命令行或cfg
  //文件(只能有1个cfg文件)覆盖初始配置,最后检测配置合法性。
  
  if (argc < 2) usage_exit();

  switch (global.color_type) {
    case I420: input.fmt = AOM_IMG_FMT_I420; break;
    case I422: input.fmt = AOM_IMG_FMT_I422; break;
    case I444: input.fmt = AOM_IMG_FMT_I444; break;
    case YV12: input.fmt = AOM_IMG_FMT_YV12; break;
  }

  {
    /* Now parse each stream's parameters. Using a local scope here
     * due to the use of 'stream' as loop variable in FOREACH_STREAM
     * loops
     */
    struct stream_state *stream = NULL;
	
	//下方的do-while循环将配置填充到一个或多个stream中。
	//golbal提供基本的全局配置,命令行或cfg文件输入的参数为具体配置。
	//“--”为 end-of-stream标识,用以隔开给不同stream的配置。
    do {
      stream = new_stream(&global, stream);
      stream_cnt++;
      if (!streams) streams = stream;
    } while (parse_stream_params(&global, stream, argv));
  }

  /* Check for unrecognized options */
  for (argi = argv; *argi; argi++)
    if (argi[0][0] == '-' && argi[0][1])
      die("Error: Unrecognized option %s\n", *argi);

  //配置检测
  FOREACH_STREAM(stream, streams) {
    check_encoder_config(global.disable_warning_prompt, &global,
                         &stream->config.cfg);

    // If large_scale_tile = 1, only support to output to ivf format.
    if (stream->config.cfg.large_scale_tile && !stream->config.write_ivf)
      die("only support ivf output format while large-scale-tile=1\n");
  }

  /* Handle non-option arguments */
  input.filename = argv[0];

  if (!input.filename) {
    fprintf(stderr, "No input file specified!\n");
    usage_exit();
  }

  /* Decide if other chroma subsamplings than 4:2:0 are supported */
  if (global.codec->fourcc == AV1_FOURCC) input.only_i420 = 0;

  //下方的for循环完成所有pass和pass内所有stream的码流生成。
  //首先进行一系列的配置过程,如配置输入图像的宽高、位深、帧率、profile和编码帧数等。详细内容见代码原本的英文注释。
  for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
    int frames_in = 0, seen_frames = 0;
    int64_t estimated_time_left = -1;
    int64_t average_rate = -1;
    int64_t lagged_count = 0;

    open_input_file(&input, global.csp);

    /* If the input file doesn't specify its w/h (raw files), try to get
     * the data from the first stream's configuration.
     */
    if (!input.width || !input.height) {
      FOREACH_STREAM(stream, streams) {
        if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
          input.width = stream->config.cfg.g_w;
          input.height = stream->config.cfg.g_h;
          break;
        }
      };
    }

    /* Update stream configurations from the input file's parameters */
    if (!input.width || !input.height)
      fatal(
          "Specify stream dimensions with --width (-w) "
          " and --height (-h)");

    /* If input file does not specify bit-depth but input-bit-depth parameter
     * exists, assume that to be the input bit-depth. However, if the
     * input-bit-depth paramter does not exist, assume the input bit-depth
     * to be the same as the codec bit-depth.
     */
    if (!input.bit_depth) {
      FOREACH_STREAM(stream, streams) {
        if (stream->config.cfg.g_input_bit_depth)
          input.bit_depth = stream->config.cfg.g_input_bit_depth;
        else
          input.bit_depth = stream->config.cfg.g_input_bit_depth =
              (int)stream->config.cfg.g_bit_depth;
      }
      if (input.bit_depth > 8) input.fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
    } else {
      FOREACH_STREAM(stream, streams) {
        stream->config.cfg.g_input_bit_depth = input.bit_depth;
      }
    }

    FOREACH_STREAM(stream, streams) {
      if (input.fmt != AOM_IMG_FMT_I420 && input.fmt != AOM_IMG_FMT_I42016) {
        /* Automatically upgrade if input is non-4:2:0 but a 4:2:0 profile
           was selected. */
        switch (stream->config.cfg.g_profile) {
          case 0:
            if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
                                         input.fmt == AOM_IMG_FMT_I44416)) {
              if (!stream->config.cfg.monochrome) {
                stream->config.cfg.g_profile = 1;
                profile_updated = 1;
              }
            } else if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
                       input.fmt == AOM_IMG_FMT_I42216) {
              stream->config.cfg.g_profile = 2;
              profile_updated = 1;
            }
            break;
          case 1:
            if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
                input.fmt == AOM_IMG_FMT_I42216) {
              stream->config.cfg.g_profile = 2;
              profile_updated = 1;
            } else if (input.bit_depth < 12 &&
                       (input.fmt == AOM_IMG_FMT_I420 ||
                        input.fmt == AOM_IMG_FMT_I42016)) {
              stream->config.cfg.g_profile = 0;
              profile_updated = 1;
            }
            break;
          case 2:
            if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
                                         input.fmt == AOM_IMG_FMT_I44416)) {
              stream->config.cfg.g_profile = 1;
              profile_updated = 1;
            } else if (input.bit_depth < 12 &&
                       (input.fmt == AOM_IMG_FMT_I420 ||
                        input.fmt == AOM_IMG_FMT_I42016)) {
              stream->config.cfg.g_profile = 0;
              profile_updated = 1;
            } else if (input.bit_depth == 12 &&
                       input.file_type == FILE_TYPE_Y4M) {
              // Note that here the input file values for chroma subsampling
              // are used instead of those from the command line.
              aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_X,
                                input.y4m.dst_c_dec_h >> 1);
              aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_Y,
                                input.y4m.dst_c_dec_v >> 1);
            } else if (input.bit_depth == 12 &&
                       input.file_type == FILE_TYPE_RAW) {
              aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_X,
                                stream->chroma_subsampling_x);
              aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_Y,
                                stream->chroma_subsampling_y);
            }
            break;
          default: break;
        }
      }
      /* Automatically set the codec bit depth to match the input bit depth.
       * Upgrade the profile if required. */
      if (stream->config.cfg.g_input_bit_depth >
          (unsigned int)stream->config.cfg.g_bit_depth) {
        stream->config.cfg.g_bit_depth = stream->config.cfg.g_input_bit_depth;
        if (!global.quiet) {
          fprintf(stderr,
                  "Warning: automatically updating bit depth to %d to "
                  "match input format.\n",
                  stream->config.cfg.g_input_bit_depth);
        }
      }
      if (stream->config.cfg.g_bit_depth > 10) {
        switch (stream->config.cfg.g_profile) {
          case 0:
          case 1:
            stream->config.cfg.g_profile = 2;
            profile_updated = 1;
            break;
          default: break;
        }
      }
      if (stream->config.cfg.g_bit_depth > 8) {
        stream->config.use_16bit_internal = 1;
      }
      if (profile_updated && !global.quiet) {
        fprintf(stderr,
                "Warning: automatically updating to profile %d to "
                "match input format.\n",
                stream->config.cfg.g_profile);
      }
      /* Set limit */
      stream->config.cfg.g_limit = global.limit;
    }

    FOREACH_STREAM(stream, streams) {
      set_stream_dimensions(stream, input.width, input.height);
    }
    FOREACH_STREAM(stream, streams) { validate_stream_config(stream, &global); }

    /* Ensure that --passes and --pass are consistent. If --pass is set and
     * --passes=2, ensure --fpf was set.
     */
    if (global.pass && global.passes == 2) {
      FOREACH_STREAM(stream, streams) {
        if (!stream->config.stats_fn)
          die("Stream %d: Must specify --fpf when --pass=%d"
              " and --passes=2\n",
              stream->index, global.pass);
      }
    }

#if !CONFIG_WEBM_IO
    FOREACH_STREAM(stream, streams) {
      if (stream->config.write_webm) {
        stream->config.write_webm = 0;
        stream->config.write_ivf = 0;
        warn("aomenc compiled w/o WebM support. Writing OBU stream.");
      }
    }
#endif

    /* Use the frame rate from the file only if none was specified
     * on the command-line.
     */
    if (!global.have_framerate) {
      global.framerate.num = input.framerate.numerator;
      global.framerate.den = input.framerate.denominator;
    }
    FOREACH_STREAM(stream, streams) {
      stream->config.cfg.g_timebase.den = global.framerate.num;//这行和下行的赋值是不是有误?
      stream->config.cfg.g_timebase.num = global.framerate.den;
    }
    /* Show configuration */
    if (global.verbose && pass == 0) {
      FOREACH_STREAM(stream, streams) {
        show_stream_config(stream, &global, &input);
      }
    }

    if (pass == (global.pass ? global.pass - 1 : 0)) {
      if (input.file_type == FILE_TYPE_Y4M)
        /*The Y4M reader does its own allocation.
          Just initialize this here to avoid problems if we never read any
          frames.*/
        memset(&raw, 0, sizeof(raw));
      else
        aom_img_alloc(&raw, input.fmt, input.width, input.height, 32);

      FOREACH_STREAM(stream, streams) {
        stream->rate_hist =
            init_rate_histogram(&stream->config.cfg, &global.framerate);
      }
    }

    FOREACH_STREAM(stream, streams) { setup_pass(stream, &global, pass); }
    FOREACH_STREAM(stream, streams) { initialize_encoder(stream, &global); }
    FOREACH_STREAM(stream, streams) {
      open_output_file(stream, &global, &input.pixel_aspect_ratio);
    }

    if (strcmp(global.codec->name, "av1") == 0 ||
        strcmp(global.codec->name, "av1") == 0) {
      // Check to see if at least one stream uses 16 bit internal.
      // Currently assume that the bit_depths for all streams using
      // highbitdepth are the same.
      FOREACH_STREAM(stream, streams) {
        if (stream->config.use_16bit_internal) {
          use_16bit_internal = 1;
        }
        input_shift = (int)stream->config.cfg.g_bit_depth -
                      stream->config.cfg.g_input_bit_depth;
      };
    }

    frame_avail = 1;
    got_data = 0;

	//下方的while循环依次处理输入序列的每一帧,进行真正的读取视频图像和编码过程。
    while (frame_avail || got_data) {
      struct aom_usec_timer timer;

      if (!global.limit || frames_in < global.limit) {
        frame_avail = read_frame(&input, &raw);//读取文件,Y4M或YUV格式,读取YUV时按行读取。

        if (frame_avail) frames_in++;
        seen_frames =
            frames_in > global.skip_frames ? frames_in - global.skip_frames : 0;

        //有点好奇为什么下方的信息输出到stderr,看程序运行时打印输出
        //有些不对劲,不知道改成stdout会不会改善。
        //2020.05.17更新:将stderr修改为stdout无效。但将下方"\rPass..."
        //修改为"\nPass..."即可解决输出信息与我预期不符的问题。
        //(原谅本人才疏学浅之前不知道转义字符'\r'的作用。。)
        if (!global.quiet) {
          float fps = usec_to_fps(cx_time, seen_frames);
          fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes);

          if (stream_cnt == 1)
            fprintf(stderr, "frame %4d/%-4d %7" PRId64 "B ", frames_in,
                    streams->frames_out, (int64_t)streams->nbytes);
          else
            fprintf(stderr, "frame %4d ", frames_in);

          fprintf(stderr, "%7" PRId64 " %s %.2f %s ",
                  cx_time > 9999999 ? cx_time / 1000 : cx_time,
                  cx_time > 9999999 ? "ms" : "us", fps >= 1.0 ? fps : fps * 60,
                  fps >= 1.0 ? "fps" : "fpm");
          print_time("ETA", estimated_time_left);
        }

      } else {
        frame_avail = 0;//当读入的帧数超过limit时就不再读取。
      }
	  
	  //读入的帧数大于要跳过的帧数才开始编码。
      if (frames_in > global.skip_frames) {
        aom_image_t *frame_to_encode;
        if (input_shift || (use_16bit_internal && input.bit_depth == 8)) {
          assert(use_16bit_internal);
          // Input bit depth and stream bit depth do not match, so up
          // shift frame to stream bit depth
          if (!allocated_raw_shift) {
            aom_img_alloc(&raw_shift, raw.fmt | AOM_IMG_FMT_HIGHBITDEPTH,
                          input.width, input.height, 32);
            allocated_raw_shift = 1;
          }
          aom_img_upshift(&raw_shift, &raw, input_shift);
          frame_to_encode = &raw_shift;
        } else {
          frame_to_encode = &raw;
        }
        aom_usec_timer_start(&timer);

        //encode_frame()是真正编码的函数,经过一系列配置再配置后终于开始真
        //编码。并且是当前帧的所有stream编码完成后,再进行下一帧编码。
        
        //若内部使用16bit位深,则要求stream的配置相对应,否则会停止运行。      
        if (use_16bit_internal) {
          assert(frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH);
          FOREACH_STREAM(stream, streams) {
            if (stream->config.use_16bit_internal)
              encode_frame(stream, &global,
                           frame_avail ? frame_to_encode : NULL, frames_in);
            else
              assert(0);
          };
        } else {
          assert((frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH) == 0);
          FOREACH_STREAM(stream, streams) {
            encode_frame(stream, &global, frame_avail ? frame_to_encode : NULL,
                         frames_in);
          }
        }
        aom_usec_timer_mark(&timer);
        cx_time += aom_usec_timer_elapsed(&timer);

	    //该步从函数名看是更新量化参数直方图,可能和码率控制有关?
        FOREACH_STREAM(stream, streams) { update_quantizer_histogram(stream); }

        got_data = 0;
        FOREACH_STREAM(stream, streams) {
          get_cx_data(stream, &global, &got_data);
        }

        if (!got_data && input.length && streams != NULL &&
            !streams->frames_out) {
          lagged_count = global.limit ? seen_frames : ftello(input.file);
        } else if (input.length) {
          int64_t remaining;
          int64_t rate;

          if (global.limit) {
            const int64_t frame_in_lagged = (seen_frames - lagged_count) * 1000;

            rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0;
            remaining = 1000 * (global.limit - global.skip_frames -
                                seen_frames + lagged_count);
          } else {
            const int64_t input_pos = ftello(input.file);
            const int64_t input_pos_lagged = input_pos - lagged_count;
            const int64_t input_limit = input.length;

            rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0;
            remaining = input_limit - input_pos + lagged_count;
          }

          average_rate =
              (average_rate <= 0) ? rate : (average_rate * 7 + rate) / 8;
          estimated_time_left = average_rate ? remaining / average_rate : -1;
        }
		
		//若有需要,对当前码流进行解码测试编解码一致性。
        if (got_data && global.test_decode != TEST_DECODE_OFF) {
          FOREACH_STREAM(stream, streams) {
            test_decode(stream, global.test_decode);
          }
        }
      }

      fflush(stdout);
      if (!global.quiet) fprintf(stderr, "\033[K");
    }// end of while (frame_avail || got_data)

    if (stream_cnt > 1) fprintf(stderr, "\n");
    
    //以下是输出编码数据信息,如码率、PSNR等。
    if (!global.quiet) {
      FOREACH_STREAM(stream, streams) {
        const int64_t bpf =
            seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0;
        const int64_t bps = bpf * global.framerate.num / global.framerate.den;
        fprintf(stderr,
                "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64
                "b/f %7" PRId64
                "b/s"
                " %7" PRId64 " %s (%.2f fps)\033[K\n",
                pass + 1, global.passes, frames_in, stream->frames_out,
                (int64_t)stream->nbytes, bpf, bps,
                stream->cx_time > 9999999 ? stream->cx_time / 1000
                                          : stream->cx_time,
                stream->cx_time > 9999999 ? "ms" : "us",
                usec_to_fps(stream->cx_time, seen_frames));
      }
    }

    if (global.show_psnr) {
      if (global.codec->fourcc == AV1_FOURCC) {
        FOREACH_STREAM(stream, streams) {
          int64_t bps = 0;
          if (stream->psnr_count && seen_frames && global.framerate.den) {
            bps = (int64_t)stream->nbytes * 8 * (int64_t)global.framerate.num /
                  global.framerate.den / seen_frames;
          }
          show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1,
                    bps);
        }
      } else {
        FOREACH_STREAM(stream, streams) { show_psnr(stream, 255.0, 0); }
      }
    }

    FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->encoder); }

    if (global.test_decode != TEST_DECODE_OFF) {
      FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->decoder); }
    }

    close_input_file(&input);

    if (global.test_decode == TEST_DECODE_FATAL) {
      FOREACH_STREAM(stream, streams) { res |= stream->mismatch_seen; }
    }
    FOREACH_STREAM(stream, streams) {
      close_output_file(stream, global.codec->fourcc);
    }

    FOREACH_STREAM(stream, streams) {
      stats_close(&stream->stats, global.passes - 1);
    }

    if (global.pass) break;
  }//end of for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++)

  if (global.show_q_hist_buckets) {
    FOREACH_STREAM(stream, streams) {
      show_q_histogram(stream->counts, global.show_q_hist_buckets);
    }
  }

  if (global.show_rate_hist_buckets) {
    FOREACH_STREAM(stream, streams) {
      show_rate_histogram(stream->rate_hist, &stream->config.cfg,
                          global.show_rate_hist_buckets);
    }
  }
  FOREACH_STREAM(stream, streams) { destroy_rate_histogram(stream->rate_hist); }

#if CONFIG_INTERNAL_STATS
  /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
   * to match some existing utilities.
   */
  if (!(global.pass == 1 && global.passes == 2)) {
    FOREACH_STREAM(stream, streams) {
      FILE *f = fopen("opsnr.stt", "a");
      if (stream->mismatch_seen) {
        fprintf(f, "First mismatch occurred in frame %d\n",
                stream->mismatch_seen);
      } else {
        fprintf(f, "No mismatch detected in recon buffers\n");
      }
      fclose(f);
    }
  }
#endif

  if (allocated_raw_shift) aom_img_free(&raw_shift);
  aom_img_free(&raw);
  free(argv);
  free(streams);
  return res ? EXIT_FAILURE : EXIT_SUCCESS;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值