1 背景
在使用IJKPLAYER二开时,为了降低直播时延,使能了ffmpeg的nobuffer选项,该选项也是ffplay所支持的。自测发现,该选项对直播时延的确有一定帮助,但会增加首开时间。
本文记录下此次踩坑经历。
2 问题现象
- IJKPLAYER首开时延不稳定,好则1秒内,坏则若干秒;
- 目标是优化到秒开。
3 原因分析
3.1 分析过程
以下是原因分析过程:
- 通过打印日志分析,发现耗时主要集中在avformat_open_input,avformat_find_stream_info耗时比较理想,多次测试均在几十ms内,而avformat_open_input则耗时短则300ms长则1秒+,具体原因是SRS3.0版本在信令交互后推流数据时有不稳定延时,此文暂不做此分析,专注分析nobuffer选项;
- av_read_frame读取的首个video AVPacket不是IDR帧;
- 而ffprobe -show_frames rtmp://ip/live/test发现每次过来的首个video帧一定是IDR帧;
- 用ffmpeg的api写了一个简易的拉流程序,不设置任何选项,断点调试发现过来的首个vidoe的AVPacket必是IDR帧;
由此,得出一个结论:
- ffmpeg内部具体是avformat模块一定有丢包,而且很可能是由于设置了某个option选项导致的。
而后,经过逐一注释掉option选项,发现罪魁祸首就是使能了ffmpeg的nobuffer选项:
// 原代码使能了nobuffer选项,查明原因后注释掉此行代码
//[options setFormatOptionValue:@"nobuffer" forKey:@"fflags"];
在注释了以上这行代码之后,断点调试发现av_read_frame读取到的首个video的AVPacket必是IDR帧。 nobuffer选项导致的首开时间增加问题得以解决。
3.2 源码剖析
在找到了原因之后,我们再来看看ffmpeg的源码,查看下若使能了nobuffer选项,底层到底干了些什么事情。
3.2.1 avformat_open_input
ffmpeg的nobuffer选项是在调用avformat_open_input接口时通过最后1个options参数传入:
int avformat_open_input(AVFormatContext **ps, const char *filename,
const AVInputFormat *fmt, AVDictionary **options)
展开avformat_open_input方法看看主要实现:
int avformat_open_input(AVFormatContext **ps, const char *filename,
const AVInputFormat *fmt, AVDictionary **options)
{
AVFormatContext *s = *ps;
FFFormatContext *si;
AVDictionary *tmp = NULL;
ID3v2ExtraMeta *id3v2_extra_meta = NULL;
int ret = 0;
// 分配AVFormatContext实例,并为av_class成员赋值为av_format_context_class
if (!s && !(s = avformat_alloc_context()))
return AVERROR(ENOMEM);
// 此处略去不相关代码
......
// 根据传入的options参数,完成对AVFormatContext的flags变量赋值
if ((ret = av_opt_set_dict(s, &tmp)) < 0)
goto fail;
// 此处略去不相关代码
......
}
在avformat_alloc_context方法里注册av_format_context_class:
AVFormatContext *avformat_alloc_context(void)
{
FFFormatContext *const si = av_mallocz(sizeof(*si));
AVFormatContext *s;
if (!si)
return NULL;
s = &si->pub;
// 注册了av_format_context_class
s->av_class = &av_format_context_class;
......
return s;
}
而av_format_context_class的定义:
static const AVClass av_format_context_class = {
.class_name = "AVFormatContext",
.item_name = format_to_name,
.option = avformat_options,
.version = LIBAVUTIL_VERSION_INT,
.child_next = format_child_next,
.child_class_iterate = format_child_class_iterate,
.category = AV_CLASS_CATEGORY_MUXER,
.get_category = get_category,
};
可以看到,av_format_context_class的option选项被赋值为avformat_options了,以下是avformat_options的详细定义:
// 计算x成员在AVFormatContext中的偏移量
#define OFFSET(x) offsetof(AVFormatContext,x)
#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
//these names are too long to be readable
#define E AV_OPT_FLAG_ENCODING_PARAM
#define D AV_OPT_FLAG_DECODING_PARAM
static const AVOption avformat_options[] = {
{"avioflags", NULL, OFFSET(avio_flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "avioflags"},
{"direct", "reduce buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVIO_FLAG_DIRECT }, INT_MIN, INT_MAX, D|E, "avioflags"},
{"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 = 5000000 }, 32, INT64_MAX, D},
{"formatprobesize", "number of bytes to probe file format", OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = PROBE_BUF_MAX}, 0, INT_MAX-1, D},
{"packetsize", "set packet size", OFFSET(packet_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, E},
// fflags成员在AVFormatContext中的偏移量为OFFSET(flags)
{"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVFMT_FLAG_AUTO_BSF }, INT_MIN, INT_MAX, D|E, "fflags"},
{"flush_packets", "reduce the latency by flushing out packets immediately", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FLUSH_PACKETS }, INT_MIN, INT_MAX, E, "fflags"},
{"ignidx", "ignore index", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_IGNIDX }, INT_MIN, INT_MAX, D, "fflags"},
{"genpts", "generate pts", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_GENPTS }, INT_MIN, INT_MAX, D, "fflags"},
{"nofillin", "do not fill in missing values that can be exactly calculated", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_NOFILLIN }, INT_MIN, INT_MAX, D, "fflags"},
{"noparse", "disable AVParsers, this needs nofillin too", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_NOPARSE }, INT_MIN, INT_MAX, D, "fflags"},
{"igndts", "ignore dts", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"},
{"discardcorrupt", "discard corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_DISCARD_CORRUPT }, INT_MIN, INT_MAX, D, "fflags"},
{"sortdts", "try to interleave outputted packets by dts", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_SORT_DTS }, INT_MIN, INT_MAX, D, "fflags"},
{"fastseek", "fast but inaccurate seeks", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FAST_SEEK }, INT_MIN, INT_MAX, D, "fflags"},
// nobuffer配置
{"nobuffer", "reduce the latency introduced by optional buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_NOBUFFER }, 0, INT_MAX, D, "fflags"},
{"bitexact", "do not write random/volatile data", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_BITEXACT }, 0, 0, E, "fflags" },
#if FF_API_LAVF_SHORTEST
{"shortest", "stop muxing with the shortest stream", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_SHORTEST }, 0, 0, E | AV_OPT_FLAG_DEPRECATED, "fflags" },
#endif
{"autobsf", "add needed bsfs automatically", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_AUTO_BSF }, 0, 0, E, "fflags" },
{"seek2any", "allow seeking to non-keyframes on demuxer level when supported", OFFSET(seek2any), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, D},
{"analyzeduration", "specify how many microseconds are analyzed to probe the input", OFFSET(max_analyze_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, D},
{"cryptokey", "decryption key", OFFSET(key), AV_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
{"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), AV_OPT_TYPE_INT, {.i64 = 1<<20 }, 0, INT_MAX, D},
{"rtbufsize", "max memory used for buffering real-time frames", OFFSET(max_picture_buffer), AV_OPT_TYPE_INT, {.i64 = 3041280 }, 0, INT_MAX, D}, /* defaults to 1s of 15fps 352x288 YUYV422 video */
{"fdebug", "print specific debug info", OFFSET(debug), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, E|D, "fdebug"},
{"ts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_FDEBUG_TS }, INT_MIN, INT_MAX, E|D, "fdebug"},
{"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, INT_MAX, E|D},
{"start_time_realtime", "wall-clock time when stream begins (PTS==0)", OFFSET(start_time_realtime), AV_OPT_TYPE_INT64, {.i64 = AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX, E},
{"fpsprobesize", "number of frames used to probe fps", OFFSET(fps_probe_size), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX-1, D},
{"audio_preload", "microseconds by which audio packets should be interleaved earlier", OFFSET(audio_preload), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, E},
{"chunk_duration", "microseconds for each chunk", OFFSET(max_chunk_duration), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, E},
{"chunk_size", "size in bytes for each chunk", OFFSET(max_chunk_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, E},
/* this is a crutch for avconv, since it cannot deal with identically named options in different contexts.
* to be removed when avconv is fixed */
{"f_err_detect", "set error detection flags (deprecated; use err_detect, save via avconv)", OFFSET(error_recognition), AV_OPT_TYPE_FLAGS, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, D, "err_detect"},
{"err_detect", "set error detection flags", OFFSET(error_recognition), AV_OPT_TYPE_FLAGS, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, D, "err_detect"},
{"crccheck", "verify embedded CRCs", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, D, "err_detect"},
{"bitstream", "detect bitstream specification deviations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, D, "err_detect"},
{"buffer", "detect improper bitstream length", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BUFFER }, INT_MIN, INT_MAX, D, "err_detect"},
{"explode", "abort decoding on minor error detection", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_EXPLODE }, INT_MIN, INT_MAX, D, "err_detect"},
{"ignore_err", "ignore errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_IGNORE_ERR }, INT_MIN, INT_MAX, D, "err_detect"},
{"careful", "consider things that violate the spec, are fast to check and have not been seen in the wild as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CAREFUL }, INT_MIN, INT_MAX, D, "err_detect"},
{"compliant", "consider all spec non compliancies as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_COMPLIANT | AV_EF_CAREFUL }, INT_MIN, INT_MAX, D, "err_detect"},
{"aggressive", "consider things that a sane encoder shouldn't do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE | AV_EF_COMPLIANT | AV_EF_CAREFUL}, INT_MIN, INT_MAX, D, "err_detect"},
{"use_wallclock_as_timestamps", "use wallclock as timestamps", OFFSET(use_wallclock_as_timestamps), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, D},
{"skip_initial_bytes", "set number of bytes to skip before reading header and frames", OFFSET(skip_initial_bytes), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX-1, D},
{"correct_ts_overflow", "correct single timestamp overflows", OFFSET(correct_ts_overflow), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, D},
{"flush_packets", "enable flushing of the I/O context after each packet", OFFSET(flush_packets), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, E},
{"metadata_header_padding", "set number of bytes to be written as padding in a metadata header", OFFSET(metadata_header_padding), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, E},
{"output_ts_offset", "set output timestamp offset", OFFSET(output_ts_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, -INT64_MAX, INT64_MAX, E},
{"max_interleave_delta", "maximum buffering duration for interleaving", OFFSET(max_interleave_delta), AV_OPT_TYPE_INT64, { .i64 = 10000000 }, 0, INT64_MAX, E },
{"f_strict", "how strictly to follow the standards (deprecated; use strict, save via avconv)", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "strict"},
{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "strict"},
{"very", "strictly conform to a older more strict version of the spec or reference software", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_VERY_STRICT }, INT_MIN, INT_MAX, D|E, "strict"},
{"strict", "strictly conform to all the things in the spec no matter what the consequences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_STRICT }, INT_MIN, INT_MAX, D|E, "strict"},
{"normal", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_NORMAL }, INT_MIN, INT_MAX, D|E, "strict"},
{"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, D|E, "strict"},
{"experimental", "allow non-standardized experimental variants", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, D|E, "strict"},
{"max_ts_probe", "maximum number of packets to read while waiting for the first timestamp", OFFSET(max_ts_probe), AV_OPT_TYPE_INT, { .i64 = 50 }, 0, INT_MAX, D },
{"avoid_negative_ts", "shift timestamps so they start at 0", OFFSET(avoid_negative_ts), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, E, "avoid_negative_ts"},
{"auto", "enabled when required by target format", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_AVOID_NEG_TS_AUTO }, INT_MIN, INT_MAX, E, "avoid_negative_ts"},
{"disabled", "do not change timestamps", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_AVOID_NEG_TS_DISABLED }, INT_MIN, INT_MAX, E, "avoid_negative_ts"},
{"make_non_negative", "shift timestamps so they are non negative", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE }, INT_MIN, INT_MAX, E, "avoid_negative_ts"},
{"make_zero", "shift timestamps so they start at 0", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_AVOID_NEG_TS_MAKE_ZERO }, INT_MIN, INT_MAX, E, "avoid_negative_ts"},
{"dump_separator", "set information dump field separator", OFFSET(dump_separator), AV_OPT_TYPE_STRING, {.str = ", "}, 0, 0, D|E},
{"codec_whitelist", "List of decoders that are allowed to be used", OFFSET(codec_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
{"format_whitelist", "List of demuxers that are allowed to be used", OFFSET(format_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
{"protocol_whitelist", "List of protocols that are allowed to be used", OFFSET(protocol_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
{"protocol_blacklist", "List of protocols that are not allowed to be used", OFFSET(protocol_blacklist), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
{"max_streams", "maximum number of streams", OFFSET(max_streams), AV_OPT_TYPE_INT, { .i64 = 1000 }, 0, INT_MAX, D },
{"skip_estimate_duration_from_pts", "skip duration calculation in estimate_timings_from_pts", OFFSET(skip_estimate_duration_from_pts), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, D},
{"max_probe_packets", "Maximum number of packets to probe a codec", OFFSET(max_probe_packets), AV_OPT_TYPE_INT, { .i64 = 2500 }, 0, INT_MAX, D },
{NULL},
};
AVFormatContext的定义如下:
typedef struct AVFormatContext {
/**
* A class for logging and @ref avoptions. Set by avformat_alloc_context().
* Exports (de)muxer private options if they exist.
*/
const AVClass *av_class;
/**
* The input container format.
*
* Demuxing only, set by avformat_open_input().
*/
const struct AVInputFormat *iformat;
/**
* The output container format.
*
* Muxing only, must be set by the caller before avformat_write_header().
*/
const struct AVOutputFormat *oformat;
/**
* Format private data. This is an AVOptions-enabled struct
* if and only if iformat/oformat.priv_class is not NULL.
*
* - muxing: set by avformat_write_header()
* - demuxing: set by avformat_open_input()
*/
void *priv_data;
/**
* I/O context.
*
* - demuxing: either set by the user before avformat_open_input() (then
* the user must close it manually) or set by avformat_open_input().
* - muxing: set by the user before avformat_write_header(). The caller must
* take care of closing / freeing the IO context.
*
* Do NOT set this field if AVFMT_NOFILE flag is set in
* iformat/oformat.flags. In such a case, the (de)muxer will handle
* I/O in some other way and this field will be NULL.
*/
AVIOContext *pb;
/* stream info */
/**
* Flags signalling stream properties. A combination of AVFMTCTX_*.
* Set by libavformat.
*/
int ctx_flags;
/**
* Number of elements in AVFormatContext.streams.
*
* Set by avformat_new_stream(), must not be modified by any other code.
*/
unsigned int nb_streams;
/**
* A list of all streams in the file. New streams are created with
* avformat_new_stream().
*
* - demuxing: streams are created by libavformat in avformat_open_input().
* If AVFMTCTX_NOHEADER is set in ctx_flags, then new streams may also
* appear in av_read_frame().
* - muxing: streams are created by the user before avformat_write_header().
*
* Freed by libavformat in avformat_free_context().
*/
AVStream **streams;
/**
* input or output URL. Unlike the old filename field, this field has no
* length restriction.
*
* - demuxing: set by avformat_open_input(), initialized to an empty
* string if url parameter was NULL in avformat_open_input().
* - muxing: may be set by the caller before calling avformat_write_header()
* (or avformat_init_output() if that is called first) to a string
* which is freeable by av_free(). Set to an empty string if it
* was NULL in avformat_init_output().
*
* Freed by libavformat in avformat_free_context().
*/
char *url;
/**
* Position of the first frame of the component, in
* AV_TIME_BASE fractional seconds. NEVER set this value directly:
* It is deduced from the AVStream values.
*
* Demuxing only, set by libavformat.
*/
int64_t start_time;
/**
* Duration of the stream, in AV_TIME_BASE fractional
* seconds. Only set this value if you know none of the individual stream
* durations and also do not set any of them. This is deduced from the
* AVStream values if not set.
*
* Demuxing only, set by libavformat.
*/
int64_t duration;
/**
* Total stream bitrate in bit/s, 0 if not
* available. Never set it directly if the file_size and the
* duration are known as FFmpeg can compute it automatically.
*/
int64_t bit_rate;
unsigned int packet_size;
int max_delay;
/**
* Flags modifying the (de)muxer behaviour. A combination of AVFMT_FLAG_*.
* Set by the user before avformat_open_input() / avformat_write_header().
*/
int flags;
#define AVFMT_FLAG_GENPTS 0x0001 ///< Generate missing pts even if it requires parsing future frames.
#define AVFMT_FLAG_IGNIDX 0x0002 ///< Ignore index.
#define AVFMT_FLAG_NONBLOCK 0x0004 ///< Do not block when reading packets from input.
#define AVFMT_FLAG_IGNDTS 0x0008 ///< Ignore DTS on frames that contain both DTS & PTS
#define AVFMT_FLAG_NOFILLIN 0x0010 ///< Do not infer any values from other values, just return what is stored in the container
#define AVFMT_FLAG_NOPARSE 0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled
#define AVFMT_FLAG_NOBUFFER 0x0040 ///< Do not buffer frames when possible
#define AVFMT_FLAG_CUSTOM_IO 0x0080 ///< The caller has supplied a custom AVIOContext, don't avio_close() it.
#define AVFMT_FLAG_DISCARD_CORRUPT 0x0100 ///< Discard frames marked corrupted
#define AVFMT_FLAG_FLUSH_PACKETS 0x0200 ///< Flush the AVIOContext every packet.
/**
* When muxing, try to avoid writing any random/volatile data to the output.
* This includes any random IDs, real-time timestamps/dates, muxer version, etc.
*
* This flag is mainly intended for testing.
*/
#define AVFMT_FLAG_BITEXACT 0x0400
#define AVFMT_FLAG_SORT_DTS 0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down)
#define AVFMT_FLAG_FAST_SEEK 0x80000 ///< Enable fast, but inaccurate seeks for some formats
#if FF_API_LAVF_SHORTEST
#define AVFMT_FLAG_SHORTEST 0x100000 ///< Stop muxing when the shortest stream stops.
#endif
#define AVFMT_FLAG_AUTO_BSF 0x200000 ///< Add bitstream filters as requested by the muxer
/**
* Maximum number of bytes read from input in order to determine stream
* properties. Used when reading the global header and in
* avformat_find_stream_info().
*
* Demuxing only, set by the caller before avformat_open_input().
*
* @note this is \e not used for determining the \ref AVInputFormat
* "input format"
* @sa format_probesize
*/
int64_t probesize;
/**
* Maximum duration (in AV_TIME_BASE units) of the data read
* from input in avformat_find_stream_info().
* Demuxing only, set by the caller before avformat_find_stream_info().
* Can be set to 0 to let avformat choose using a heuristic.
*/
int64_t max_analyze_duration;
const uint8_t *key;
int keylen;
unsigned int nb_programs;
AVProgram **programs;
/**
* Forced video codec_id.
* Demuxing: Set by user.
*/
enum AVCodecID video_codec_id;
/**
* Forced audio codec_id.
* Demuxing: Set by user.
*/
enum AVCodecID audio_codec_id;
/**
* Forced subtitle codec_id.
* Demuxing: Set by user.
*/
enum AVCodecID subtitle_codec_id;
/**
* Maximum amount of memory in bytes to use for the index of each stream.
* If the index exceeds this size, entries will be discarded as
* needed to maintain a smaller size. This can lead to slower or less
* accurate seeking (depends on demuxer).
* Demuxers for which a full in-memory index is mandatory will ignore
* this.
* - muxing: unused
* - demuxing: set by user
*/
unsigned int max_index_size;
/**
* Maximum amount of memory in bytes to use for buffering frames
* obtained from realtime capture devices.
*/
unsigned int max_picture_buffer;
/**
* Number of chapters in AVChapter array.
* When muxing, chapters are normally written in the file header,
* so nb_chapters should normally be initialized before write_header
* is called. Some muxers (e.g. mov and mkv) can also write chapters
* in the trailer. To write chapters in the trailer, nb_chapters
* must be zero when write_header is called and non-zero when
* write_trailer is called.
* - muxing: set by user
* - demuxing: set by libavformat
*/
unsigned int nb_chapters;
AVChapter **chapters;
/**
* Metadata that applies to the whole file.
*
* - demuxing: set by libavformat in avformat_open_input()
* - muxing: may be set by the caller before avformat_write_header()
*
* Freed by libavformat in avformat_free_context().
*/
AVDictionary *metadata;
/**
* Start time of the stream in real world time, in microseconds
* since the Unix epoch (00:00 1st January 1970). That is, pts=0 in the
* stream was captured at this real world time.
* - muxing: Set by the caller before avformat_write_header(). If set to
* either 0 or AV_NOPTS_VALUE, then the current wall-time will
* be used.
* - demuxing: Set by libavformat. AV_NOPTS_VALUE if unknown. Note that
* the value may become known after some number of frames
* have been received.
*/
int64_t start_time_realtime;
/**
* The number of frames used for determining the framerate in
* avformat_find_stream_info().
* Demuxing only, set by the caller before avformat_find_stream_info().
*/
int fps_probe_size;
/**
* Error recognition; higher values will detect more errors but may
* misdetect some more or less valid parts as errors.
* Demuxing only, set by the caller before avformat_open_input().
*/
int error_recognition;
/**
* Custom interrupt callbacks for the I/O layer.
*
* demuxing: set by the user before avformat_open_input().
* muxing: set by the user before avformat_write_header()
* (mainly useful for AVFMT_NOFILE formats). The callback
* should also be passed to avio_open2() if it's used to
* open the file.
*/
AVIOInterruptCB interrupt_callback;
/**
* Flags to enable debugging.
*/
int debug;
#define FF_FDEBUG_TS 0x0001
/**
* Maximum buffering duration for interleaving.
*
* To ensure all the streams are interleaved correctly,
* av_interleaved_write_frame() will wait until it has at least one packet
* for each stream before actually writing any packets to the output file.
* When some streams are "sparse" (i.e. there are large gaps between
* successive packets), this can result in excessive buffering.
*
* This field specifies the maximum difference between the timestamps of the
* first and the last packet in the muxing queue, above which libavformat
* will output a packet regardless of whether it has queued a packet for all
* the streams.
*
* Muxing only, set by the caller before avformat_write_header().
*/
int64_t max_interleave_delta;
/**
* Allow non-standard and experimental extension
* @see AVCodecContext.strict_std_compliance
*/
int strict_std_compliance;
/**
* Flags indicating events happening on the file, a combination of
* AVFMT_EVENT_FLAG_*.
*
* - demuxing: may be set by the demuxer in avformat_open_input(),
* avformat_find_stream_info() and av_read_frame(). Flags must be cleared
* by the user once the event has been handled.
* - muxing: may be set by the user after avformat_write_header() to
* indicate a user-triggered event. The muxer will clear the flags for
* events it has handled in av_[interleaved]_write_frame().
*/
int event_flags;
/**
* - demuxing: the demuxer read new metadata from the file and updated
* AVFormatContext.metadata accordingly
* - muxing: the user updated AVFormatContext.metadata and wishes the muxer to
* write it into the file
*/
#define AVFMT_EVENT_FLAG_METADATA_UPDATED 0x0001
/**
* Maximum number of packets to read while waiting for the first timestamp.
* Decoding only.
*/
int max_ts_probe;
/**
* Avoid negative timestamps during muxing.
* Any value of the AVFMT_AVOID_NEG_TS_* constants.
* Note, this works better when using av_interleaved_write_frame().
* - muxing: Set by user
* - demuxing: unused
*/
int avoid_negative_ts;
#define AVFMT_AVOID_NEG_TS_AUTO -1 ///< Enabled when required by target format
#define AVFMT_AVOID_NEG_TS_DISABLED 0 ///< Do not shift timestamps even when they are negative.
#define AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE 1 ///< Shift timestamps so they are non negative
#define AVFMT_AVOID_NEG_TS_MAKE_ZERO 2 ///< Shift timestamps so that they start at 0
/**
* Transport stream id.
* This will be moved into demuxer private options. Thus no API/ABI compatibility
*/
int ts_id;
/**
* Audio preload in microseconds.
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user
* - decoding: unused
*/
int audio_preload;
/**
* Max chunk time in microseconds.
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user
* - decoding: unused
*/
int max_chunk_duration;
/**
* Max chunk size in bytes
* Note, not all formats support this and unpredictable things may happen if it is used when not supported.
* - encoding: Set by user
* - decoding: unused
*/
int max_chunk_size;
/**
* forces the use of wallclock timestamps as pts/dts of packets
* This has undefined results in the presence of B frames.
* - encoding: unused
* - decoding: Set by user
*/
int use_wallclock_as_timestamps;
/**
* avio flags, used to force AVIO_FLAG_DIRECT.
* - encoding: unused
* - decoding: Set by user
*/
int avio_flags;
/**
* The duration field can be estimated through various ways, and this field can be used
* to know how the duration was estimated.
* - encoding: unused
* - decoding: Read by user
*/
enum AVDurationEstimationMethod duration_estimation_method;
/**
* Skip initial bytes when opening stream
* - encoding: unused
* - decoding: Set by user
*/
int64_t skip_initial_bytes;
/**
* Correct single timestamp overflows
* - encoding: unused
* - decoding: Set by user
*/
unsigned int correct_ts_overflow;
/**
* Force seeking to any (also non key) frames.
* - encoding: unused
* - decoding: Set by user
*/
int seek2any;
/**
* Flush the I/O context after each packet.
* - encoding: Set by user
* - decoding: unused
*/
int flush_packets;
/**
* format probing score.
* The maximal score is AVPROBE_SCORE_MAX, its set when the demuxer probes
* the format.
* - encoding: unused
* - decoding: set by avformat, read by user
*/
int probe_score;
/**
* Maximum number of bytes read from input in order to identify the
* \ref AVInputFormat "input format". Only used when the format is not set
* explicitly by the caller.
*
* Demuxing only, set by the caller before avformat_open_input().
*
* @sa probesize
*/
int format_probesize;
/**
* ',' separated list of allowed decoders.
* If NULL then all are allowed
* - encoding: unused
* - decoding: set by user
*/
char *codec_whitelist;
/**
* ',' separated list of allowed demuxers.
* If NULL then all are allowed
* - encoding: unused
* - decoding: set by user
*/
char *format_whitelist;
/**
* IO repositioned flag.
* This is set by avformat when the underlaying IO context read pointer
* is repositioned, for example when doing byte based seeking.
* Demuxers can use the flag to detect such changes.
*/
int io_repositioned;
/**
* Forced video codec.
* This allows forcing a specific decoder, even when there are multiple with
* the same codec_id.
* Demuxing: Set by user
*/
const struct AVCodec *video_codec;
/**
* Forced audio codec.
* This allows forcing a specific decoder, even when there are multiple with
* the same codec_id.
* Demuxing: Set by user
*/
const struct AVCodec *audio_codec;
/**
* Forced subtitle codec.
* This allows forcing a specific decoder, even when there are multiple with
* the same codec_id.
* Demuxing: Set by user
*/
const struct AVCodec *subtitle_codec;
/**
* Forced data codec.
* This allows forcing a specific decoder, even when there are multiple with
* the same codec_id.
* Demuxing: Set by user
*/
const struct AVCodec *data_codec;
/**
* Number of bytes to be written as padding in a metadata header.
* Demuxing: Unused.
* Muxing: Set by user via av_format_set_metadata_header_padding.
*/
int metadata_header_padding;
/**
* User data.
* This is a place for some private data of the user.
*/
void *opaque;
/**
* Callback used by devices to communicate with application.
*/
av_format_control_message control_message_cb;
/**
* Output timestamp offset, in microseconds.
* Muxing: set by user
*/
int64_t output_ts_offset;
/**
* dump format separator.
* can be ", " or "\n " or anything else
* - muxing: Set by user.
* - demuxing: Set by user.
*/
uint8_t *dump_separator;
/**
* Forced Data codec_id.
* Demuxing: Set by user.
*/
enum AVCodecID data_codec_id;
/**
* ',' separated list of allowed protocols.
* - encoding: unused
* - decoding: set by user
*/
char *protocol_whitelist;
/**
* A callback for opening new IO streams.
*
* Whenever a muxer or a demuxer needs to open an IO stream (typically from
* avformat_open_input() for demuxers, but for certain formats can happen at
* other times as well), it will call this callback to obtain an IO context.
*
* @param s the format context
* @param pb on success, the newly opened IO context should be returned here
* @param url the url to open
* @param flags a combination of AVIO_FLAG_*
* @param options a dictionary of additional options, with the same
* semantics as in avio_open2()
* @return 0 on success, a negative AVERROR code on failure
*
* @note Certain muxers and demuxers do nesting, i.e. they open one or more
* additional internal format contexts. Thus the AVFormatContext pointer
* passed to this callback may be different from the one facing the caller.
* It will, however, have the same 'opaque' field.
*/
int (*io_open)(struct AVFormatContext *s, AVIOContext **pb, const char *url,
int flags, AVDictionary **options);
#if FF_API_AVFORMAT_IO_CLOSE
/**
* A callback for closing the streams opened with AVFormatContext.io_open().
*
* @deprecated use io_close2
*/
attribute_deprecated
void (*io_close)(struct AVFormatContext *s, AVIOContext *pb);
#endif
/**
* ',' separated list of disallowed protocols.
* - encoding: unused
* - decoding: set by user
*/
char *protocol_blacklist;
/**
* The maximum number of streams.
* - encoding: unused
* - decoding: set by user
*/
int max_streams;
/**
* Skip duration calcuation in estimate_timings_from_pts.
* - encoding: unused
* - decoding: set by user
*/
int skip_estimate_duration_from_pts;
/**
* Maximum number of packets that can be probed
* - encoding: unused
* - decoding: set by user
*/
int max_probe_packets;
/**
* A callback for closing the streams opened with AVFormatContext.io_open().
*
* Using this is preferred over io_close, because this can return an error.
* Therefore this callback is used instead of io_close by the generic
* libavformat code if io_close is NULL or the default.
*
* @param s the format context
* @param pb IO context to be closed and freed
* @return 0 on success, a negative AVERROR code on failure
*/
int (*io_close2)(struct AVFormatContext *s, AVIOContext *pb);
} AVFormatContext;
最后,在以下代码里完成对AVFormatContext的flags的赋值:
target_obj指针实际便是外围传入的AVFormatContext实例;
- dst指针所指向的内容即是flags的值,为64,也即#define AVFMT_FLAG_NOBUFFER 0x0040 ///< Do not buffer frames when possible;
- o变量的类型是AVOption,在此便是fflags选项的实例,o->offset便是flags变量在AVFormatContext中的偏移量;
- 于是,AVFormatContext的flags便完成了对avformat_open_input的option传参的赋值,由nobuffer转为flags的整形位操作;
拿到AVFormatContext的实例target_obj,并计算出fflags选项所对应的flags变量在AVFormatContext中的偏移量:
拿到nobuffer选项AVOption实例:
并给AVFormatContext中的flags变量赋值,为#define AVFMT_FLAG_NOBUFFER 0x0040,即64:
3.2.2 avformat_find_stream_info
最后在avformat_find_stream_info中查找音视频及字幕流时,若使能了#define AVFMT_FLAG_NOBUFFER 0x0040选项,则不缓存而丢包,包括IDR帧:
int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) {
......
ret = read_frame_internal(ic, pkt1);
if (ret == AVERROR(EAGAIN))
continue;
if (ret < 0) {
/* EOF or error*/
eof_reached = 1;
break;
}
if (!(ic->flags & AVFMT_FLAG_NOBUFFER)) {
// 若未使能nobuffer选项,即AVFMT_FLAG_NOBUFFER为假,则缓存在FFFormatContext的packet_buffer中
ret = avpriv_packet_list_put(&si->packet_buffer,
pkt1, NULL, 0);
if (ret < 0)
goto unref_then_goto_end;
pkt = &si->packet_buffer.tail->pkt;
} else {
// 若使能了nobuffer选项,即AVFMT_FLAG_NOBUFFER为真,则丢包不缓存,包括IDR帧
pkt = pkt1;
}
......
}
3.3.3 av_read_frame
而av_read_frame读取AVPacket时:
int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
FFFormatContext *const si = ffformatcontext(s);
const int genpts = s->flags & AVFMT_FLAG_GENPTS;
int eof = 0;
int ret;
AVStream *st;
if (!genpts) {
ret = si->packet_buffer.head
? avpriv_packet_list_get(&si->packet_buffer, pkt)
: read_frame_internal(s, pkt);
if (ret < 0)
return ret;
goto return_packet;
}
for (;;) {
// 首先从FFFormatContext的packet_buffer队首读取
PacketListEntry *pktl = si->packet_buffer.head;
if (pktl) {
AVPacket *next_pkt = &pktl->pkt;
// 此处略去对pts和dts的处理逻辑代码
......
}
// 若FFFormatContext的packet_buffer为空,再从FFFormatContext的raw_packet_buffer中读取,
// 若raw_packet_buffer为空,则从AVInputFormat => AVIOContext => URLContext => URLProtocol直接从网络中读取字节流
ret = read_frame_internal(s, pkt);
if (ret < 0) {
if (pktl && ret != AVERROR(EAGAIN)) {
eof = 1;
continue;
} else
return ret;
}
// 最后将读取到的AVPacket包放入到FFFormatContext的packet_buffer队尾
ret = avpriv_packet_list_put(&si->packet_buffer,
pkt, NULL, 0);
if (ret < 0) {
av_packet_unref(pkt);
return ret;
}
}
return_packet:
st = s->streams[pkt->stream_index];
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) && pkt->flags & AV_PKT_FLAG_KEY) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts, 0, 0, AVINDEX_KEYFRAME);
}
if (is_relative(pkt->dts))
pkt->dts -= RELATIVE_TS_BASE;
if (is_relative(pkt->pts))
pkt->pts -= RELATIVE_TS_BASE;
return ret;
}
于是,便出现了后续用av_read_frame读取video的AVPacket时,首个video包不是IDR帧的情况。
4 解决办法
找到了原因,则解决办法就简单了,通过注释掉nobuffer选项代码即可:
// ffmpeg的nobuffer选项虽可降低视频的时延,但会增加首开时延,要合理使用!
//[options setFormatOptionValue:@"nobuffer" forKey:@"fflags"];