我们首先来看大致一个格式是怎么用到的。
lavf 公共格式:
AVFormatContext {AVInputFormat *iformat; //输入格式
AVOutputFormat *oformat; //或者输出格式
void *priv_data; //私有上下文
AVIOContext *pb; //输入输出上下文
unsigned int nb_streams; //几路流
AVStream *streams[]; //流
char filename[1024]; //文件名字
AVPacketList *packet_buffer; //换成没有解码的包,比如用作AVFMT_FLAG_GENPTS
};
AVInputFormat{
const char *name; //格式的名字
const AVClass *priv_class; //格式的参数option等静态信息
int (*read_header)(struct AVFormatContext *);//读取文件头来初始化AVFormatContext
int (*read_packet)(struct AVFormatContext *, AVPacket *pkt);
int (*read_close)(struct AVFormatContext *);
};
AVStream{
int index; //反向索引,AVFormatContext.streams[]的下标
AVCodecContext *codec; //文件头里面的编解码信息
void *priv_data;
AVRational time_base; //时间单位,比如1/29.4秒
int64_t duration; //从比特率和文件大小来估计
int skip_to_keyframe; //read_frame_internal用来只返回关键帧
};
AVPacket{
AVBufferRef *buf; //指向存放数据的buffer, 不为空表示引用计数
int64_t pts; //解码呈现给用户的时间
int64_t dts; //包被解码的时间
uint8_t *data;
int size;
int stream_index; //反向索引
int duration; //等于下一个pts - 当前pts
int64_t pos; //在流里面第几个字节
};
输入设备被当成demuxer, demuxer又被表示成AVInputFormat。
pkt.duration? probe pes?
上述两个问题是想直接把PES作为AVPacket传给ffmpeg要解决的。
如果支持pes探测,则pes自带pts,dts, 则
duration=next_pts - this_pts.
如果只给ffmpeg ES,不带pts,dts,我测试的效果是音频和视频完全不同步。
测试probe audio.pes可以,但是video.pes不行。
假设我们写了一个ff_pes_demuxer,
"-f pes" 对应
AVInputFormat *file_iformat = av_find_input_format("pes");
省去了格式探测过程。
在compute_pkt_fields里面,
如果pkt->duration 为零,则通过r_frame_rate,time_base, pkt->size来估计;
否则用它去更新PacketList里面的dts/pts/duration。
用ffmpeg生成的hls.ts,分析packet的duration也是零,
按理说能正常播的话,就侧面证明这个字段在demuxing时,可以置为零。
增加格式pes
====
在libavformat/Makefile加一行:
OBJS-$(CONFIG_PES_DEMUXER) += pes.o
在libavformat/allformats.c加一行:
REGISTER_DEMUXER (PES, pes);
然后重新configure就可以了。
参数传递
====
AVFormat.OputputFormat.Class.Option
obj 必须指向顶层结构比如AVFormat等。
AVFormatContext *ofmt_ctx;
av_opt_set_int(obj = ofmt_ctx, "hls_wrap", 5, AV_OPT_SEARCH_CHILDREN);
另一个相关的是把参数集放到词典里:
AVInputFormat
*file_iformat;
AVDictionary *format_opts = NULL;
av_dict_set(&format_opts, "codec_id", "0x15000", 0); // ./ffprobe
audio.pes -codec_id 0x15000
file_iformat = av_find_input_format("pes"); // ./ffprobe -f pes
avformat_open_input(&ic, filename, file_iformat, &format_opts);
常用的codec_id有:
AV_CODEC_ID_MPEG2VIDEO = 2
AV_CODEC_ID_H264 = 28
AV_CODEC_ID_MP2 = 0x15000
公共接口:
avformat_open_input(AVFormatContext **ps, const char *filename,
AVInputFormat *fmt, AVDictionary **options)
主要是打开文件,探测格式:
avio_open2(&s->pb, filename);
av_probe_input_buffer2();
s->iformat->read_header(s);
avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
st = ic->streams[i]; //i \in [0, ic->nb_streams)
在ffm_read_header,pmt_cb里面,mpegts_set_stream_info-->
avpriv_set_pts_info(st, 33, 1, 90000);设置st->time_base,然后赋给
st->codec->time_base = st->time_base;
read_frame_internal(ic, &pkt1)-->ff_read_packet
对于一些没有文件头的格式,比如mpeg, 会动态的avformat_new_stream,
比如在pmt_cb和mpegts_push_data时。
codec = avcodec_find_decoder(st->codec->codec_id);
通常这些没有:
frame_size, sample_fmt, samples_rate, channels
width, pix_fmt,
打开avcodec_open2(st->codec, codec, options)来解码这些参数,
比如mpegaudiodec_template.c:decode_frame-->avpriv_mpegaudio_decode_header
mpeg12dec.c:decode_chunks-->
mpeg1_decode_sequence
mpeg_decode_postinit -->ff_set_dimensions
mpeg音频只需要读文件头就可以了,mpeg视频稍微复杂些,要去找SEQ_START_CODE,
h264就更复杂了,不是两句话就说的清楚的,请参考[2]
int av_read_frame(AVFormatContext *s, AVPacket *pkt)
返回一个流里面的下一帧,并猜测包的pts/dts/duration。
通常情形下,视频pkt包含完整一帧数据,音频pkt包含连续几帧数据。
异常情况下,pkt可能包含部分帧,帧的后面部分,帧间的无效数据。
比如mpegts.c:mpegts_push_data, pes头里面没有指定大小。
附我为ffmpeg写的第一个扩展:
#include "libavutil/opt.h"
#include "libavutil/avstring.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/crc.h"
#include "libavutil/dict.h"
#include "libavutil/mathematics.h"
#include "avformat.h"
#include "internal.h"
#include "avio_internal.h"
#define MAX_PES_HEADER_SIZE (9 + 255)
#define MAX_PES_PAYLOAD_SIZE (200*1024)
typedef struct{
AVClass *class; /*for priv_class.option, must be first*/
enum AVCodecID codec_id; /*AV_CODEC_ID_XX*/
int64_t last_dts; /*dts of last pes*/
int64_t last_pts; /*pts of last pes*/
int extended_stream_id;
}PesContext;
static int is_pes_start_code(int32_t code)
{
int found = 0;
switch(code){
case 0x000001e0: /*video*/
case 0x000001c0: /*audio*/
case 0x000001bc: /*program_stream_map*/
case 0x000001bf: /*private_stream_2*/
case 0x000001f0: /*ECM*/
case 0x000001f1: /*EMM*/
case 0x000001f2: /*DSMCC_stream*/
case 0x000001f8: /*H.22.1 type E stream*/
case 0x000001ff: /*program_stream_directory*/
found = 1;
default: break;
}
return found;
}
static inline int64_t parse_pes_pts(const uint8_t *buf) {
return (int64_t)(*buf & 0x0e) << 29 |
(AV_RB16(buf+1) >> 1) << 15 |
AV_RB16(buf+3) >> 1;
}
static int pes_read_probe(AVProbeData *p)
{
const uint8_t *ptr, *end;
uint32_t code;
int found = 0;
end = p->buf + p->buf_size - sizeof(uint32_t);
for(ptr = p->buf; ptr < end; ++ptr) {
code = AV_RB32(ptr);
if(found=is_pes_start_code(code))break;
}
return found ? AVPROBE_SCORE_EXTENSION + 1 : 0;
}
static int pes_read_header(AVFormatContext *s)
{
PesContext *pes = s->priv_data;
AVStream *st;
int64_t off;
int ret;
#if 1
st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
st->codec->codec_type = pes->codec_id >= AV_CODEC_ID_MP2 ? AVMEDIA_TYPE_AUDIO : AVMEDIA_TYPE_VIDEO;
st->codec->codec_id = pes->codec_id;
#if 0
st->id = -3; /* -3 to avoid clash with video stream and audio stream */
st->codec->width = 720;
st->codec->height = 576;
st->codec->pix_fmt = AV_PIX_FMT_YUV420P;
//st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
st->start_time = 0;
s->pb->maxsize = -1;
off = avio_tell(s->pb);
#endif
#endif
//s->ctx_flags |= AVFMTCTX_NOHEADER;
pes->last_pts = pes->last_dts = AV_NOPTS_VALUE;
/* the parameters will be extracted from the compressed bitstream */
return 0;
}
static int read_to_next_sync(AVIOContext *pb, uint8_t *buf, int *size)
{/*byte-to-byte manner*/
uint8_t *ptr = buf, *ptr_end = buf + *size;
int found = 0;
for(ptr = buf; !url_feof(pb) && (ptr < ptr_end); ){
*ptr = avio_r8(pb);
if(*ptr++ != 0x00)continue;
*ptr = avio_r8(pb);
if(*ptr++ != 0x00)continue;
*ptr = avio_r8(pb);
if(*ptr++ != 0x01)continue;
*ptr = avio_r8(pb);
if(is_pes_start_code((0x000001<<8)|(*ptr++) )){
found = 1;
break;
}
}
if(found){
avio_seek(pb, -4, SEEK_CUR);
*size = ptr - buf - 4;
}else{
*size = ptr - buf;
}
//printf("==xx %02x %02x %02x %02x %02x\n", buf[0], buf[1], buf[2], buf[3], buf[4]);
#if 1
#endif
return 0;
}
static int pes_read_packet(AVFormatContext *s, AVPacket *pkt)
{
PesContext *pes = s->priv_data;
AVIOContext *pb = s->pb;
int64_t pos, pos2, size = MAX_PES_PAYLOAD_SIZE;
int64_t pts = pes->last_pts, dts = pes->last_pts;
int32_t code;
uint8_t tmp;
int ret = 0, found = 0, header_size, pes_header_size;
int total_size = 0;
unsigned int flags, pes_ext, skip;
uint8_t *r, buf[MAX_PES_HEADER_SIZE ] = "";
pos = avio_tell(pb);
header_size = avio_read(pb, buf, sizeof(buf));
if(header_size < 9 ){
return AVERROR_EOF;
}
if(!is_pes_start_code(AV_RB32(buf)))goto _end;
total_size = AV_RB16(buf+4); //4,5;
flags = AV_RB8(buf+7); //7
pes_header_size = AV_RB8(buf+8) + 9; //8
r = buf + 9;
if ((flags & 0xc0) == 0x80) {
dts = pts = parse_pes_pts(r);
r += 5;
} else if ((flags & 0xc0) == 0xc0) {
pts = parse_pes_pts(r);
r += 5;
dts = parse_pes_pts(r);
r += 5;
}
pes->extended_stream_id = -1;
if (flags & 0x01) { /* PES extension */
pes_ext = *r++;
/* Skip PES private data, program packet sequence counter and P-STD buffer */
skip = (pes_ext >> 4) & 0xb;
skip += skip & 0x9;
r += skip;
if ((pes_ext & 0x41) == 0x01 &&
(r + 2) <= (buf + pes_header_size)) {
/* PES extension 2 */
if ((r[0] & 0x7f) > 0 && (r[1] & 0x80) == 0)
pes->extended_stream_id = r[1];
}
}
pos += pes_header_size ;
_end:
if(total_size && size > total_size)
size = total_size + 6 - pes_header_size;
if(av_new_packet(pkt, size) < 0){
return AVERROR(ENOMEM);
}
avio_seek(pb, pos, SEEK_SET);
if(total_size){
avio_read(pb, pkt->data, pkt->size);
}else{
read_to_next_sync(pb, pkt->data, &pkt->size);
}
#if 0
static unsigned int total_bytes = 0;
total_bytes += pkt->size;
printf("==size %d total %u\t", pkt->size, total_bytes);
static FILE *fp;
if(!fp){
fp = fopen("xx.mpg", "wb");
}
fwrite(pkt->data, 1, pkt->size, fp);
fflush(fp);
#endif
pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
pkt->stream_index = 0;
pkt->pts = pts;
pkt->dts = dts;
pes->last_pts = pts;
pes->last_dts = dts;
return ret;
}
static const AVOption options[] = {
{ "codec_id", "AV_CODEC_ID_XX", offsetof(PesContext, codec_id), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
{ NULL },
};
static const AVClass demuxer_class = {
.class_name = "pes class",
.item_name = av_default_item_name,
.option = options,
.version = LIBAVUTIL_VERSION_INT,
.category = AV_CLASS_CATEGORY_DEMUXER,
};
AVInputFormat ff_pes_demuxer = {
.name = "pes",
.long_name = NULL_IF_CONFIG_SMALL("Packeted Elementary Stream"),
.read_probe = pes_read_probe,
.read_header = pes_read_header,
.read_packet = pes_read_packet,
.priv_data_size = sizeof(PesContext),
.flags = AVFMT_GENERIC_INDEX,
.extensions = "pes",
.priv_class = &demuxer_class,
};
测试
录制的流可以播放,音视频正常同步, 不过场景变化快时有些许马赛克。
TODO
[mpeg @ 0x1a5ca10] buffer underflow st=1 bufi=3440 size=6912
[mpeg @ 0x1a5ca10] packet too large, ignoring buffer limits to mux it
[mpeg @ 0x1a5ca10] buffer underflow st=1 bufi=3440 size=6912
参考
[2] http://stackoverflow.com/questions/6394874/fetching-the-dimensions-of-a-h264video-stream