FFmpeg在libavformat模块提供音视频的muxer封装与demuxer解封装。其中muxer封装文件包括avformat_write_header()、av_write_frame()和av_write_trailer()。本文主要探讨av_write_frame函数如何写入音视频帧数据,包括音视频交错与音视频非交错两种情况。
av_write_frame函数位于libavformat/mux.c,写音视频帧的流程如下:
目录
4、write_packet与interleaved_write_packet
1、av_write_frame
av_write_frame的声明位于libavformat/avformat.h,具体如下:
/**
* Write a packet to an output media file.
*
* This function passes the packet directly to the muxer, without any buffering
* or reordering. The caller is responsible for correctly interleaving the
* packets if the format requires it. Callers that want libavformat to handle
* the interleaving should call av_interleaved_write_frame() instead of this
* function.
*
* @param s media file handle
* @param pkt The packet containing the data to be written. Note that unlike
* av_interleaved_write_frame(), this function does not take
* ownership of the packet passed to it (though some muxers may make
* an internal reference to the input packet).
* @return < 0 on error, = 0 if OK, 1 if flushed and there is no more data to flush
*
* @see av_interleaved_write_frame()
*/
int av_write_frame(AVFormatContext *s, AVPacket *pkt);
大致翻译为:写一个数据包到输出媒体文件。这个函数直接传递数据包到封装器,没有任何缓存或重排序。如果格式需要,调用者要负责把数据包正确排列。如果调用者期望处理交错的数据包,应该调用av_interleaved_write_frame()函数而不是当前这个函数。
下面来看看无交错写音视频av_write_frame()函数的实现,位于libavformat/mux.c:
int av_write_frame(AVFormatContext *s, AVPacket *in)
{
AVPacket *pkt = s->internal->pkt;
int ret;
if (!in) {
if (s->oformat->flags & AVFMT_ALLOW_FLUSH) {
ret = s->oformat->write_packet(s, NULL);
flush_if_needed(s);
if (ret >= 0 && s->pb && s->pb->error < 0)
ret = s->pb->error;
return ret;
}
return 1;
}
if (in->flags & AV_PKT_FLAG_UNCODED_FRAME) {
pkt = in;
} else {
/* We don't own in, so we have to make sure not to modify it.
* The following avoids copying in's data unnecessarily. */
av_packet_unref(pkt);
pkt->buf = NULL;
pkt->data = in->data;
pkt->size = in->size;
ret = av_packet_copy_props(pkt, in);
if (ret < 0)
return ret;
if (in->buf) {
pkt->buf = av_buffer_ref(in->buf);
if (!pkt->buf) {
ret = AVERROR(ENOMEM);
goto fail;
}
}
}
ret = write_packets_common(s, pkt, 0/*non-interleaved*/);
fail:
av_packet_unref(pkt);
return ret;
}
再看看有交错写音视频帧av_interleaved_write_frame()函数的实现:
int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt)
{
int ret;
if (pkt) {
ret = write_packets_common(s, pkt, 1/*interleaved*/);
if (ret < 0)
av_packet_unref(pkt);
return ret;
} else {
av_log(s, AV_LOG_TRACE, "av_interleaved_write_frame FLUSH\n");
return interleaved_write_packet(s, NULL, 1/*flush*/);
}
}
经过对比,内部都是调用write_packets_common()函数,区别在于av_write_frame()传参是无交错,而av_interleaved_write_frame()传参是有交错。
2、write_packets_common
接下来看看write_packets_common()函数的实现:
static int write_packets_common(AVFormatContext *s, AVPacket *pkt, int interleaved)
{
AVStream *st;
// 检查pkt的stream_index和codec_type
int ret = check_packet(s, pkt);
if (ret < 0)
return ret;
st = s->streams[pkt->stream_index];
// 检查pkt的pts和dts
ret = prepare_input_packet(s, st, pkt);
if (ret < 0)
return ret;
// 调用s->oformat->check_bitstream()检查码流
ret = check_bitstream(s, st, pkt);
if (ret < 0)
return ret;
if (st->internal->bsfc) {
// 经过bitstream filter处理再写数据包
return write_packets_from_bsfs(s, st, pkt, interleaved);
} else {
// 通用方式写数据包
return write_packet_common(s, st, pkt, interleaved);
}
}
由源码可知,主要是分4个步骤处理:
- 检查pkt的stream_index和codec_type;
- 检查pkt的pts和dts;
- 调用s->oformat->check_bitstream()检查码流;
- 调用write_packets_from_bsfs或write_packet_common写数据包;
3、write_packet_common
write_packets_from_bsfs()函数主要是经过bitstream filter处理,比如h264要处理startcode起始码。我们主要探讨write_packet_common()函数的实现:
static int write_packet_common(AVFormatContext *s, AVStream *st, AVPacket *pkt, int interleaved)
{
int ret;
// 猜测pkt数据包的时长
guess_pkt_duration(s, st, pkt);
#if FF_API_COMPUTE_PKT_FIELDS2 && FF_API_LAVF_AVCTX
if ((ret = compute_muxer_pkt_fields(s, st, pkt)) < 0 && !(s->oformat->flags & AVFMT_NOTIMESTAMPS))
return ret;
#endif
if (interleaved) {
// 有交错写数据包
if (pkt->dts == AV_NOPTS_VALUE && !(s->oformat->flags & AVFMT_NOTIMESTAMPS))
return AVERROR(EINVAL);
return interleaved_write_packet(s, pkt, 0);
} else {
// 无交错写数据包
return write_packet(s, pkt);
}
}
由此可见,主要根据interleaved标志位判断,如果为有交错就调用interleaved_write_packet()函数写数据包,如果为无交错就调用write_packet()函数写数据包。
4、write_packet与interleaved_write_packet
先看看interleaved_write_packet()函数的实现:
static int interleaved_write_packet(AVFormatContext *s, AVPacket *pkt, int flush)
{
for (;; ) {
AVPacket opkt;
int ret = interleave_packet(s, &opkt, pkt, flush);
if (ret <= 0)
return ret;
pkt = NULL;
ret = write_packet(s, &opkt);
av_packet_unref(&opkt);
if (ret < 0)
return ret;
}
}
由此可见,有交错写数据包先调用interleave_packet()函数进行交错排序,最终调用write_packet()函数写入数据包。接下来分析write_packet()函数的实现:
static int write_packet(AVFormatContext *s, AVPacket *pkt)
{
int ret;
// 根据output_ts_offset来校正dts和pts时间戳
if (s->output_ts_offset) {
AVStream *st = s->streams[pkt->stream_index];
int64_t offset = av_rescale_q(s->output_ts_offset, AV_TIME_BASE_Q, st->time_base);
if (pkt->dts != AV_NOPTS_VALUE)
pkt->dts += offset;
if (pkt->pts != AV_NOPTS_VALUE)
pkt->pts += offset;
}
// 校正负数的时间戳
if (s->avoid_negative_ts > 0) {
AVStream *st = s->streams[pkt->stream_index];
int64_t offset = st->internal->mux_ts_offset;
int64_t ts = s->internal->avoid_negative_ts_use_pts ? pkt->pts : pkt->dts;
if (s->internal->offset == AV_NOPTS_VALUE && ts != AV_NOPTS_VALUE &&
(ts < 0 || s->avoid_negative_ts == AVFMT_AVOID_NEG_TS_MAKE_ZERO)) {
s->internal->offset = -ts;
s->internal->offset_timebase = st->time_base;
}
if (s->internal->offset != AV_NOPTS_VALUE && !offset) {
offset = st->internal->mux_ts_offset =
av_rescale_q_rnd(s->internal->offset,
s->internal->offset_timebase,
st->time_base,
AV_ROUND_UP);
}
if (pkt->dts != AV_NOPTS_VALUE)
pkt->dts += offset;
if (pkt->pts != AV_NOPTS_VALUE)
pkt->pts += offset;
}
if ((pkt->flags & AV_PKT_FLAG_UNCODED_FRAME)) {
AVFrame **frame = (AVFrame **)pkt->data;
av_assert0(pkt->size == sizeof(*frame));
// 写入未压缩编码的数据包
ret = s->oformat->write_uncoded_frame(s, pkt->stream_index, frame, 0);
} else {
// 写入编码的数据包
ret = s->oformat->write_packet(s, pkt);
}
if (s->pb && ret >= 0) {
flush_if_needed(s);
if (s->pb->error < 0)
ret = s->pb->error;
}
if (ret >= 0)
s->streams[pkt->stream_index]->nb_frames++;
return ret;
}
由源码可知,分3个步骤处理:
- 根据output_ts_offset来校正dts和pts时间戳;
- 校正负数的时间戳;
- 写入未压缩编码的数据包或写入编码的数据包;
5、s->oformat->write_packet
以mp4封装格式为例,位于libavformat/movenc.c,来看看mp4的AVOutputFormat:
AVOutputFormat ff_mp4_muxer = {
.name = "mp4",
.long_name = NULL_IF_CONFIG_SMALL("MP4 (MPEG-4 Part 14)"),
.mime_type = "video/mp4",
.extensions = "mp4",
.priv_data_size = sizeof(MOVMuxContext),
.audio_codec = AV_CODEC_ID_AAC,
.video_codec = CONFIG_LIBX264_ENCODER ?
AV_CODEC_ID_H264 : AV_CODEC_ID_MPEG4,
.init = mov_init,
.write_header = mov_write_header,
.write_packet = mov_write_packet,
.write_trailer = mov_write_trailer,
.deinit = mov_free,
.flags = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH | AVFMT_TS_NEGATIVE,
.codec_tag = mp4_codec_tags_list,
.check_bitstream = mov_check_bitstream,
.priv_class = &mp4_muxer_class,
};
此时write_packet函数指针指向mov_write_packet(),我们来探讨下mov_write_packet()函数实现:
static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
MOVTrack *trk;
// 判断pkt是否为空
if (!pkt) {
mov_flush_fragment(s, 1);
return 1;
}
trk = &mov->tracks[pkt->stream_index];
if (is_cover_image(trk->st)) {
int ret;
if (trk->st->nb_frames >= 1) {
return 0;
}
if ((ret = av_packet_ref(trk->cover_image, pkt)) < 0)
return ret;
return 0;
} else {
int i;
if (!pkt->size)
return mov_write_single_packet(s, pkt); /* Passthrough. */
/*
* Subtitles require special handling.
*
* 1) For full complaince, every track must have a sample at
* dts == 0, which is rarely true for subtitles. So, as soon
* as we see any packet with dts > 0, write an empty subtitle
* at dts == 0 for any subtitle track with no samples in it.
*
* 2) For each subtitle track, check if the current packet's
* dts is past the duration of the last subtitle sample. If
* so, we now need to write an end sample for that subtitle.
*
* 3) See mov_write_trailer for how the final end sample is
* handled.
*/
for (i = 0; i < mov->nb_streams; i++) {
MOVTrack *trk = &mov->tracks[i];
int ret;
if (trk->par->codec_id == AV_CODEC_ID_MOV_TEXT &&
trk->track_duration < pkt->dts &&
(trk->entry == 0 || !trk->last_sample_is_subtitle_end)) {
// 写入字幕流结束的数据包
ret = mov_write_subtitle_end_packet(s, i, trk->track_duration);
if (ret < 0) return ret;
trk->last_sample_is_subtitle_end = 1;
}
}
......
// 写入单个数据包
return mov_write_single_packet(s, pkt);
}
}
由此可见,先判断pkt数据包是否为空,最终调用mov_write_single_packet()函数写单个数据包,函数实现如下:
static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
MOVTrack *trk = &mov->tracks[pkt->stream_index];
AVCodecParameters *par = trk->par;
int64_t frag_duration = 0;
int size = pkt->size;
// 判断pkt是否为空
int ret = check_pkt(s, pkt);
if (ret < 0)
return ret;
......
return ff_mov_write_packet(s, pkt);
}
同样地,该函数也是先判断pk数据包是否为空,中间做一些逻辑处理,最终调用ff_mov_write_packet()函数来真正写数据包:
int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
{
......
// 如果存在extradata则进行拷贝
if (trk->vos_len == 0 && par->extradata_size > 0 &&
!TAG_IS_AVCI(trk->tag) &&
(par->codec_id != AV_CODEC_ID_DNXHD)) {
trk->vos_len = par->extradata_size;
trk->vos_data = av_malloc(trk->vos_len + AV_INPUT_BUFFER_PADDING_SIZE);
if (!trk->vos_data) {
ret = AVERROR(ENOMEM);
goto err;
}
memcpy(trk->vos_data, par->extradata, trk->vos_len);
memset(trk->vos_data + trk->vos_len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
}
if ((par->codec_id == AV_CODEC_ID_DNXHD ||
par->codec_id == AV_CODEC_ID_H264 ||
par->codec_id == AV_CODEC_ID_HEVC ||
par->codec_id == AV_CODEC_ID_TRUEHD ||
par->codec_id == AV_CODEC_ID_AC3) && !trk->vos_len &&
!TAG_IS_AVCI(trk->tag)) {
/* copy frame to create needed atoms */
trk->vos_len = size;
trk->vos_data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
if (!trk->vos_data) {
ret = AVERROR(ENOMEM);
goto err;
}
memcpy(trk->vos_data, pkt->data, size);
memset(trk->vos_data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
}
if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 &&
(AV_RB16(pkt->data) & 0xfff0) == 0xfff0) {
if (!s->streams[pkt->stream_index]->nb_frames) {
return -1;
}
av_log(s, AV_LOG_WARNING, "aac bitstream error\n");
}
if (par->codec_id == AV_CODEC_ID_H264 && trk->vos_len > 0
&& *(uint8_t *)trk->vos_data != 1 && !TAG_IS_AVCI(trk->tag)) {
/* from x264 or from bytestream H.264 */
/* NAL reformatting needed */
if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data,
&size);
if (ret < 0)
return ret;
avio_write(pb, reformatted_data, size);
} else {
if (trk->cenc.aes_ctr) {
size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
if (size < 0) {
ret = size;
goto err;
}
} else {
size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
}
}
} else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
(AV_RB24(trk->vos_data) == 1 || AV_RB32(trk->vos_data) == 1)) {
/* extradata is Annex B, assume the bitstream is too and convert it */
if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
ret = ff_hevc_annexb2mp4_buf(pkt->data, &reformatted_data,
&size, 0, NULL);
if (ret < 0)
return ret;
avio_write(pb, reformatted_data, size);
} else {
size = ff_hevc_annexb2mp4(pb, pkt->data, pkt->size, 0, NULL);
}
} else if (par->codec_id == AV_CODEC_ID_AV1) {
if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
&size, &offset);
if (ret < 0)
return ret;
avio_write(pb, reformatted_data, size);
} else {
size = ff_av1_filter_obus(pb, pkt->data, pkt->size);
}
#if CONFIG_AC3_PARSER
} else if (par->codec_id == AV_CODEC_ID_EAC3) {
size = handle_eac3(mov, pkt, trk);
if (size < 0)
return size;
else if (!size)
goto end;
avio_write(pb, pkt->data, size);
#endif
} else if (par->codec_id == AV_CODEC_ID_EIA_608) {
size = 8;
for (int i = 0; i < pkt->size; i += 3) {
if (pkt->data[i] == 0xFC) {
size += 2;
}
}
avio_wb32(pb, size);
ffio_wfourcc(pb, "cdat");
for (int i = 0; i < pkt->size; i += 3) {
if (pkt->data[i] == 0xFC) {
avio_w8(pb, pkt->data[i + 1]);
avio_w8(pb, pkt->data[i + 2]);
}
}
} else {
if (trk->cenc.aes_ctr) {
if (par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 4) {
int nal_size_length = (par->extradata[4] & 0x3) + 1;
ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, nal_size_length, pb, pkt->data, size);
} else {
ret = ff_mov_cenc_write_packet(&trk->cenc, pb, pkt->data, size);
}
if (ret) {
goto err;
}
} else {
avio_write(pb, pkt->data, size);
}
}
if (trk->entry >= trk->cluster_capacity) {
unsigned new_capacity = trk->entry + MOV_INDEX_CLUSTER_SIZE;
void *cluster = av_realloc_array(trk->cluster, new_capacity, sizeof(*trk->cluster));
if (!cluster) {
ret = AVERROR(ENOMEM);
goto err;
}
trk->cluster = cluster;
trk->cluster_capacity = new_capacity;
}
trk->cluster[trk->entry].pos = avio_tell(pb) - size;
trk->cluster[trk->entry].samples_in_chunk = samples_in_chunk;
trk->cluster[trk->entry].chunkNum = 0;
trk->cluster[trk->entry].size = size;
trk->cluster[trk->entry].entries = samples_in_chunk;
trk->cluster[trk->entry].dts = pkt->dts;
trk->cluster[trk->entry].pts = pkt->pts;
if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
if (!trk->frag_discont) {
trk->cluster[trk->entry].dts = trk->start_dts + trk->track_duration;
if ((mov->flags & FF_MOV_FLAG_DASH &&
!(mov->flags & (FF_MOV_FLAG_GLOBAL_SIDX | FF_MOV_FLAG_SKIP_SIDX))) ||
mov->mode == MODE_ISM)
pkt->pts = pkt->dts + trk->end_pts - trk->cluster[trk->entry].dts;
} else {
trk->frag_start = pkt->dts - trk->start_dts;
trk->end_pts = AV_NOPTS_VALUE;
trk->frag_discont = 0;
}
}
if (!trk->entry && trk->start_dts == AV_NOPTS_VALUE && !mov->use_editlist &&
s->avoid_negative_ts == AVFMT_AVOID_NEG_TS_MAKE_ZERO) {
trk->cluster[trk->entry].dts = trk->start_dts = 0;
}
if (trk->start_dts == AV_NOPTS_VALUE) {
trk->start_dts = pkt->dts;
if (trk->frag_discont) {
if (mov->use_editlist) {
trk->frag_start = pkt->pts;
trk->start_dts = pkt->dts - pkt->pts;
} else {
trk->frag_start = pkt->dts;
trk->start_dts = 0;
}
trk->frag_discont = 0;
} else if (pkt->dts && mov->moov_written)
av_log(s, AV_LOG_WARNING,
"Track %d starts with a nonzero dts %"PRId64", while the moov "
"already has been written.\n", pkt->stream_index, pkt->dts);
}
trk->track_duration = pkt->dts - trk->start_dts + pkt->duration;
trk->last_sample_is_subtitle_end = 0;
if (pkt->pts == AV_NOPTS_VALUE) {
pkt->pts = pkt->dts;
}
if (pkt->dts != pkt->pts)
trk->flags |= MOV_TRACK_CTTS;
trk->cluster[trk->entry].cts = pkt->pts - pkt->dts;
trk->cluster[trk->entry].flags = 0;
if (trk->start_cts == AV_NOPTS_VALUE)
trk->start_cts = pkt->pts - pkt->dts;
if (trk->end_pts == AV_NOPTS_VALUE)
trk->end_pts = trk->cluster[trk->entry].dts +
trk->cluster[trk->entry].cts + pkt->duration;
else
trk->end_pts = FFMAX(trk->end_pts, trk->cluster[trk->entry].dts +
trk->cluster[trk->entry].cts +
pkt->duration);
if (par->codec_id == AV_CODEC_ID_VC1) {
mov_parse_vc1_frame(pkt, trk);
} else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
mov_parse_truehd_frame(pkt, trk);
} else if (pkt->flags & AV_PKT_FLAG_KEY) {
if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
trk->entry > 0) { // force sync sample for the first key frame
mov_parse_mpeg2_frame(pkt, &trk->cluster[trk->entry].flags);
if (trk->cluster[trk->entry].flags & MOV_PARTIAL_SYNC_SAMPLE)
trk->flags |= MOV_TRACK_STPS;
} else {
trk->cluster[trk->entry].flags = MOV_SYNC_SAMPLE;
}
if (trk->cluster[trk->entry].flags & MOV_SYNC_SAMPLE)
trk->has_keyframes++;
}
......
end:
err:
if (pkt->data != reformatted_data)
av_free(reformatted_data);
return ret;
}
至此,av_write_frame()写音视频函数过程分析完毕。下一篇文章将与大家探讨av_write_trailer()函数的实现。