FFmpeg源码剖析-解码：av_read_frame()

最新推荐文章于 2024-08-09 00:00:00 发布

北雨南萍

最新推荐文章于 2024-08-09 00:00:00 发布

阅读量3.9k

点赞数

分类专栏：流媒体: FFmpeg专项

本文链接：https://blog.csdn.net/fireroll/article/details/78025281

版权

流媒体: FFmpeg专项专栏收录该内容

62 篇文章 23 订阅

订阅专栏

av_read_frame()函数位于libavformat/utils.c

1. 函数概述

读取输入流的一帧数据；

2. 函数调用图

3. 伪码分析

4. 代码分析

av_read_frame()它的声明(libavformat/avformat.h)/**
* Return the next frame of a stream.
* This function returns what is stored in the file, and does not validate
* that what is there are valid frames for the decoder. It will split what is
* stored in the file into frames and return one for each call. It will not
* omit invalid data between valid frames so as to give the decoder the maximum
* information possible for decoding.
*
* If pkt->buf is NULL, then the packet is valid until the next
* av_read_frame() or until avformat_close_input(). Otherwise the packet
* is valid indefinitely. In both cases the packet must be freed with
* av_packet_unref when it is no longer needed. For video, the packet contains
* exactly one frame. For audio, it contains an integer number of frames if each
* frame has a known fixed size (e.g. PCM or ADPCM data). If the audio frames
* have a variable size (e.g. MPEG audio), then it contains one frame.
*
* pkt->pts, pkt->dts and pkt->duration are always set to correct
* values in AVStream.time_base units (and guessed if the format cannot
* provide them). pkt->pts can be AV_NOPTS_VALUE if the video format
* has B-frames, so it is better to rely on pkt->dts if you do not
* decompress the payload.
*
* @return 0 if OK, < 0 on error or end of file
*/
int av_read_frame(AVFormatContext *s, AVPacket *pkt);

函数功能: 返回输入流的下一帧。
因为它是为解码器提供尽可能多的信息，所以它返回的是存储在文件中的下一帧，
而不会验证这一帧对解码器是否有效。

参数解析:
. pkt->buf，
如果它的值为NULL，那这个Packet数据有效，直到下一个av_read_frame()或 avformat_close_input()。
否则，这个packet数据不确定是否有效。
在这两种情况下，当数据不再有效时，需要用av_packet_unref来释放。
对于视频来说，这个packet包含一个完整的视频帧；
对于音频来说，如果音频帧的大小是固定的(如PCM或ADPCM数据)，则包含整数帧；
如果音频帧的大小是可变的(如MPEG音频)，则包含一帧。
. pkt->pts, pkt->dts和pkt->duration是以AVStream.time_base为单位表示的。
pkt->pts，值可以是AV_NOPTS_VALUE,如果视频中包含B-frames。
所以当不进行数据解码时，pkt->dts是更可靠的。

返回值:
返回值为 0 ，则正确；
返回值小于0，则表示出错或到了文件尾；

它的定义(libavformat/utils.c):
#define AVFMT_FLAG_GENPTS 0x0001 ///< Generate missing pts even if it requires parsing future frames.

int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
const int genpts = s->flags & AVFMT_FLAG_GENPTS;
int eof = 0;
int ret;
AVStream *st;

if (!genpts) {
/* 如果包的链表中有数据，则直接从链表中取一个packet;
* 如果没有，则要读文件获得一个packet.
*/
ret = s->internal->packet_buffer
? read_from_packet_buffer(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt)
: read_frame_internal(s, pkt);
if (ret < 0)
return ret;
goto return_packet;
}

for (;;) {
AVPacketList *pktl = s->internal->packet_buffer;

if (pktl) {
AVPacket *next_pkt = &pktl->pkt;

if (next_pkt->dts != AV_NOPTS_VALUE) {
int wrap_bits = s->streams[next_pkt->stream_index]->pts_wrap_bits;

// last dts seen for this stream. if any of packets following
// current one had no dts, we will set this to AV_NOPTS_VALUE.
int64_t last_dts = next_pkt->dts;
while (pktl && next_pkt->pts == AV_NOPTS_VALUE) {
if (pktl->pkt.stream_index == next_pkt->stream_index &&
(av_compare_mod(next_pkt->dts, pktl->pkt.dts, 2LL << (wrap_bits - 1)) < 0)) {
if (av_compare_mod(pktl->pkt.pts, pktl->pkt.dts, 2LL << (wrap_bits - 1))) {
// not B-frame
next_pkt->pts = pktl->pkt.dts;
}
if (last_dts != AV_NOPTS_VALUE) {
// Once last dts was set to AV_NOPTS_VALUE, we don't change it.
last_dts = pktl->pkt.dts;
}
}
pktl = pktl->next;
}
if (eof && next_pkt->pts == AV_NOPTS_VALUE && last_dts != AV_NOPTS_VALUE) {
// Fixing the last reference frame had none pts issue (For MXF etc).
// We only do this when
// 1. eof.
// 2. we are not able to resolve a pts value for current packet.
// 3. the packets for this stream at the end of the files had valid dts.
next_pkt->pts = last_dts + next_pkt->duration;
}
pktl = s->internal->packet_buffer;
}

/* read packet from packet buffer, if there is data */
st = s->streams[next_pkt->stream_index];
if (!(next_pkt->pts == AV_NOPTS_VALUE && st->discard < AVDISCARD_ALL &&
next_pkt->dts != AV_NOPTS_VALUE && !eof)) {
ret = read_from_packet_buffer(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt);
goto return_packet;
}
}

ret = read_frame_internal(s, pkt);
if (ret < 0) {
if (pktl && ret != AVERROR(EAGAIN)) {
eof = 1;
continue;
} else
return ret;
}

ret = add_to_pktbuf(&s->internal->packet_buffer, pkt,
&s->internal->packet_buffer_end, 1);
av_packet_unref(pkt);
if (ret < 0)
return ret;
}

return_packet:
st = s->streams[pkt->stream_index];
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) && pkt->flags & AV_PKT_FLAG_KEY) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts, 0, 0, AVINDEX_KEYFRAME);
}

if (is_relative(pkt->dts))
pkt->dts -= RELATIVE_TS_BASE;
if (is_relative(pkt->pts))
pkt->pts -= RELATIVE_TS_BASE;

return ret;
}

4.1 read_from_packet_buffer

static int read_from_packet_buffer(AVPacketList **pkt_buffer,
AVPacketList **pkt_buffer_end,
AVPacket *pkt)
{
AVPacketList *pktl;
av_assert0(*pkt_buffer);

pktl = *pkt_buffer;
*pkt = pktl->pkt;
*pkt_buffer = pktl->next;
if (!pktl->next)
*pkt_buffer_end = NULL;
av_freep(&pktl);
return 0;
}

这个函数很简单，
就是从packet链表的头起取一个packet，然后将链表pkt_buffer指针向下移动一个。

4.2 read_frame_internal()

read_frame_internal()位于 libavformat/utils.c
static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
{
int ret = 0, i, got_packet = 0;
AVDictionary *metadata = NULL;

/* AVPacket结构体的初始化 */
av_init_packet(pkt);

while (!got_packet && !s->internal->parse_queue) {
AVStream *st;
AVPacket cur_pkt;

/* read next packet */
ret = ff_read_packet(s, &cur_pkt);
if (ret < 0) { ... }
ret = 0;
st = s->streams[cur_pkt.stream_index]; //从全局变量中获得当前包所在流的数据信息

/* update context if required */
if (st->internal->need_context_update) {
...
}

if (!st->need_parsing || !st->parser) {
/* no parsing needed: we just output the packet as is */
*pkt = cur_pkt;
compute_pkt_fields(s, st, NULL, pkt, AV_NOPTS_VALUE, AV_NOPTS_VALUE);
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) &&
(pkt->flags & AV_PKT_FLAG_KEY) && pkt->dts != AV_NOPTS_VALUE) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts,
0, 0, AVINDEX_KEYFRAME);
}
got_packet = 1;
} else if (st->discard < AVDISCARD_ALL) {
/* 解析当前的packet，并将所有解析出的数据填充到结构体中 */
if ((ret = parse_packet(s, &cur_pkt, cur_pkt.stream_index)) < 0)
return ret;
st->codecpar->sample_rate = st->internal->avctx->sample_rate;
st->codecpar->bit_rate = st->internal->avctx->bit_rate;
st->codecpar->channels = st->internal->avctx->channels;
st->codecpar->channel_layout = st->internal->avctx->channel_layout;
st->codecpar->codec_id = st->internal->avctx->codec_id;
} else {
/* free packet */
av_packet_unref(&cur_pkt);
}
if (pkt->flags & AV_PKT_FLAG_KEY)
st->skip_to_keyframe = 0;
if (st->skip_to_keyframe) {
av_packet_unref(&cur_pkt);
if (got_packet) {
*pkt = cur_pkt;
}
got_packet = 0;
}
}

/* 再从上面已读取新包到包链表中取出一个包 */
if (!got_packet && s->internal->parse_queue)
ret = read_from_packet_buffer(&s->internal->parse_queue, &s->internal->parse_queue_end, pkt);

if (ret >= 0) {
AVStream *st = s->streams[pkt->stream_index];
int discard_padding = 0;
if (st->first_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
int64_t sample = ts_to_samples(st, pts);
int duration = ts_to_samples(st, pkt->duration);
int64_t end_sample = sample + duration;
if (duration > 0 && end_sample >= st->first_discard_sample &&
sample < st->last_discard_sample)
discard_padding = FFMIN(end_sample - st->first_discard_sample, duration);
}
if (st->start_skip_samples && (pkt->pts == 0 || pkt->pts == RELATIVE_TS_BASE))
st->skip_samples = st->start_skip_samples;
if (st->skip_samples || discard_padding) {
uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (p) {
AV_WL32(p, st->skip_samples);
AV_WL32(p + 4, discard_padding);
av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d / discard %d\n", st->skip_samples, discard_padding);
}
st->skip_samples = 0;
}

if (st->inject_global_side_data) {
for (i = 0; i < st->nb_side_data; i++) {
AVPacketSideData *src_sd = &st->side_data[i];
uint8_t *dst_data;

if (av_packet_get_side_data(pkt, src_sd->type, NULL))
continue;

dst_data = av_packet_new_side_data(pkt, src_sd->type, src_sd->size);
if (!dst_data) {
av_log(s, AV_LOG_WARNING, "Could not inject global side data\n");
continue;
}

memcpy(dst_data, src_sd->data, src_sd->size);
}
st->inject_global_side_data = 0;
}

if (!(s->flags & AVFMT_FLAG_KEEP_SIDE_DATA))
av_packet_merge_side_data(pkt);
}

av_opt_get_dict_val(s, "metadata", AV_OPT_SEARCH_CHILDREN, &metadata);
if (metadata) {
s->event_flags |= AVFMT_EVENT_FLAG_METADATA_UPDATED;
av_dict_copy(&s->metadata, metadata, 0);
av_dict_free(&metadata);
av_opt_set_dict_val(s, "metadata", NULL, AV_OPT_SEARCH_CHILDREN);
}

#if FF_API_LAVF_AVCTX
update_stream_avctx(s);
#endif

if (s->debug & FF_FDEBUG_TS)
av_log(s, AV_LOG_DEBUG,
"read_frame_internal stream=%d, pts=%s, dts=%s, "
"size=%d, duration=%"PRId64", flags=%d\n",
pkt->stream_index,
av_ts2str(pkt->pts),
av_ts2str(pkt->dts),
pkt->size, pkt->duration, pkt->flags);

return ret;
}

4.3 parse_packet()

libavformat/utils.c
/**
* Parse a packet, add all split parts to parse_queue.
*
* @param pkt Packet to parse, NULL when flushing the parser at end of stream.
*/
static int parse_packet(AVFormatContext *s, AVPacket *pkt, int stream_index)
{
AVPacket out_pkt = { 0 }, flush_pkt = { 0 };
AVStream *st = s->streams[stream_index];
uint8_t *data = pkt ? pkt->data : NULL;
int size = pkt ? pkt->size : 0;
int ret = 0, got_output = 0;

if (!pkt) {
av_init_packet(&flush_pkt);
pkt = &flush_pkt;
got_output = 1;
} else if (!size && st->parser->flags & PARSER_FLAG_COMPLETE_FRAMES) {
// preserve 0-size sync packets
compute_pkt_fields(s, st, st->parser, pkt, AV_NOPTS_VALUE, AV_NOPTS_VALUE);
}

while (size > 0 || (pkt == &flush_pkt && got_output)) {
int len;
int64_t next_pts = pkt->pts;
int64_t next_dts = pkt->dts;

av_init_packet(&out_pkt);
/* 调用视频或音频解析器进行对应的解析 */
len = av_parser_parse2(st->parser, st->internal->avctx,
&out_pkt.data, &out_pkt.size, data, size,
pkt->pts, pkt->dts, pkt->pos);

pkt->pts = pkt->dts = AV_NOPTS_VALUE;
pkt->pos = -1;
/* increment read pointer */
data += len;
size -= len;

got_output = !!out_pkt.size;

if (!out_pkt.size)
continue;

if (pkt->side_data) {
out_pkt.side_data = pkt->side_data;
out_pkt.side_data_elems = pkt->side_data_elems;
pkt->side_data = NULL;
pkt->side_data_elems = 0;
}

/* set the duration */
out_pkt.duration = (st->parser->flags & PARSER_FLAG_COMPLETE_FRAMES) ? pkt->duration : 0;
if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (st->internal->avctx->sample_rate > 0) {
out_pkt.duration =
av_rescale_q_rnd(st->parser->duration,
(AVRational) { 1, st->internal->avctx->sample_rate },
st->time_base,
AV_ROUND_DOWN);
}
}

out_pkt.stream_index = st->index;
out_pkt.pts = st->parser->pts;
out_pkt.dts = st->parser->dts;
out_pkt.pos = st->parser->pos;

if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
out_pkt.pos = st->parser->frame_offset;

if (st->parser->key_frame == 1 ||
(st->parser->key_frame == -1 &&
st->parser->pict_type == AV_PICTURE_TYPE_I))
out_pkt.flags |= AV_PKT_FLAG_KEY;

if (st->parser->key_frame == -1 && st->parser->pict_type ==AV_PICTURE_TYPE_NONE && (pkt->flags&AV_PKT_FLAG_KEY))
out_pkt.flags |= AV_PKT_FLAG_KEY;

/* 计算当前包的pts,dts,duration三个值 */
compute_pkt_fields(s, st, st->parser, &out_pkt, next_dts, next_pts);

/* 将当前包添加到包链表中 */
ret = add_to_pktbuf(&s->internal->parse_queue, &out_pkt,
&s->internal->parse_queue_end, 1);
av_packet_unref(&out_pkt);
if (ret < 0)
goto fail;
}

/* end of the stream => close and free the parser */
if (pkt == &flush_pkt) {
av_parser_close(st->parser);
st->parser = NULL;
}

fail:
av_packet_unref(pkt);
return ret;
}

4.4 av_parser_parse2()

libavformat/avcodec.h
/**
* Parse a packet.
*
* @param s parser context.
* @param avctx codec context.
* @param poutbuf set to pointer to parsed buffer or NULL if not yet finished.
* @param poutbuf_size set to size of parsed buffer or zero if not yet finished.
* @param buf input buffer.
* @param buf_size buffer size in bytes without the padding. I.e. the full buffer
size is assumed to be buf_size + AV_INPUT_BUFFER_PADDING_SIZE.
To signal EOF, this should be 0 (so that the last frame
can be output).
* @param pts input presentation timestamp.
* @param dts input decoding timestamp.
* @param pos input byte position in stream.
* @return the number of bytes of the input bitstream used.
*
* Example:
* @code
* while(in_len){
* len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
* in_data, in_len,
* pts, dts, pos);
* in_data += len;
* in_len -= len;
*
* if(size)
* decode_frame(data, size);
* }
* @endcode
*/
int av_parser_parse2(AVCodecParserContext *s,
AVCodecContext *avctx,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int64_t pts, int64_t dts,
int64_t pos);
解析一个packet。

libavcodec/parser.c
int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int64_t pts, int64_t dts, int64_t pos)
{
int index, i;
uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];

av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);

/* Parsers only work for the specified codec ids. */
av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
avctx->codec_id == s->parser->codec_ids[1] ||
avctx->codec_id == s->parser->codec_ids[2] ||
avctx->codec_id == s->parser->codec_ids[3] ||
avctx->codec_id == s->parser->codec_ids[4]);

if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
s->next_frame_offset =
s->cur_offset = pos;
s->flags |= PARSER_FLAG_FETCHED_OFFSET;
}

if (buf_size == 0) {
/* padding is always necessary even if EOF, so we add it here */
memset(dummy_buf, 0, sizeof(dummy_buf));
buf = dummy_buf;
} else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
/* add a new packet descriptor */
i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
s->cur_frame_start_index = i;
s->cur_frame_offset[i] = s->cur_offset;
s->cur_frame_end[i] = s->cur_offset + buf_size;
s->cur_frame_pts[i] = pts;
s->cur_frame_dts[i] = dts;
s->cur_frame_pos[i] = pos;
}

if (s->fetch_timestamp) {
s->fetch_timestamp = 0;
s->last_pts = s->pts;
s->last_dts = s->dts;
s->last_pos = s->pos;
ff_fetch_timestamp(s, 0, 0, 0);
}
/* WARNING: the returned index can be negative */
/* 此处函数指针将指向当前包所对应流的解析器，
* 如H.264的视频流，则调用libavcodec/h264_parser.c:h264_parse()函数，
* 它将分解析H.264的NAL单元，从而获得这个包的完整信息
*/
index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
poutbuf_size, buf, buf_size);
av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
FILL(field_order);
}

/* update the file pointer */
if (*poutbuf_size) {
/* fill the data for the current frame */
s->frame_offset = s->next_frame_offset;

/* offset of the next frame */
s->next_frame_offset = s->cur_offset + index;
s->fetch_timestamp = 1;
}
if (index < 0)
index = 0;
s->cur_offset += index;
return index;
}