增加一个 AVFormat 格式:PES

最新推荐文章于 2022-09-23 00:55:49 发布

芦笛

最新推荐文章于 2022-09-23 00:55:49 发布

阅读量1.1k

点赞数

分类专栏： format 文章标签： codec ffmpeg

本文链接：https://blog.csdn.net/deyangliu/article/details/39481697

版权

format 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

我们首先来看大致一个格式是怎么用到的。

lavf 公共格式:

AVFormatContext {
AVInputFormat *iformat; //输入格式
AVOutputFormat *oformat; //或者输出格式
void *priv_data; //私有上下文
AVIOContext *pb; //输入输出上下文
unsigned int nb_streams; //几路流
AVStream *streams[]; //流
char filename[1024]; //文件名字
AVPacketList *packet_buffer; //换成没有解码的包，比如用作AVFMT_FLAG_GENPTS
};

AVInputFormat{
const char *name; //格式的名字
const AVClass *priv_class; //格式的参数option等静态信息
int (*read_header)(struct AVFormatContext *);//读取文件头来初始化AVFormatContext
int (*read_packet)(struct AVFormatContext *, AVPacket *pkt);
int (*read_close)(struct AVFormatContext *);
};

AVStream{
int index; //反向索引，AVFormatContext.streams[]的下标
AVCodecContext *codec; //文件头里面的编解码信息
void *priv_data;
AVRational time_base; //时间单位，比如1/29.4秒
int64_t duration; //从比特率和文件大小来估计
int skip_to_keyframe; //read_frame_internal用来只返回关键帧
};

AVPacket{
AVBufferRef *buf; //指向存放数据的buffer, 不为空表示引用计数
int64_t pts; //解码呈现给用户的时间
int64_t dts; //包被解码的时间
uint8_t *data;
int size;
int stream_index; //反向索引
int duration; //等于下一个pts - 当前pts
int64_t pos; //在流里面第几个字节
};

输入设备被当成demuxer, demuxer又被表示成AVInputFormat。

pkt.duration？ probe pes?
上述两个问题是想直接把PES作为AVPacket传给ffmpeg要解决的。
如果支持pes探测，则pes自带pts,dts, 则

duration=next_pts - this_pts.
如果只给ffmpeg ES，不带pts,dts,我测试的效果是音频和视频完全不同步。
测试probe audio.pes可以，但是video.pes不行。

假设我们写了一个ff_pes_demuxer,
"-f pes" 对应
AVInputFormat *file_iformat = av_find_input_format("pes");
省去了格式探测过程。

在compute_pkt_fields里面，
如果pkt->duration 为零，则通过r_frame_rate，time_base, pkt->size来估计；
否则用它去更新PacketList里面的dts/pts/duration。
用ffmpeg生成的hls.ts，分析packet的duration也是零，
按理说能正常播的话，就侧面证明这个字段在demuxing时，可以置为零。

增加格式pes
====
在libavformat/Makefile加一行:
OBJS-$(CONFIG_PES_DEMUXER) += pes.o
在libavformat/allformats.c加一行:
REGISTER_DEMUXER (PES, pes);
然后重新configure就可以了。

参数传递
====
AVFormat.OputputFormat.Class.Option
obj 必须指向顶层结构比如AVFormat等。

AVFormatContext *ofmt_ctx;
av_opt_set_int(obj = ofmt_ctx, "hls_wrap", 5, AV_OPT_SEARCH_CHILDREN);

另一个相关的是把参数集放到词典里:
AVInputFormat

*file_iformat;
AVDictionary *format_opts = NULL;
av_dict_set(&format_opts, "codec_id", "0x15000", 0); // ./ffprobe

audio.pes -codec_id 0x15000
file_iformat = av_find_input_format("pes"); // ./ffprobe -f pes
avformat_open_input(&ic, filename, file_iformat, &format_opts);
常用的codec_id有:
AV_CODEC_ID_MPEG2VIDEO = 2
AV_CODEC_ID_H264 = 28
AV_CODEC_ID_MP2 = 0x15000

公共接口:
avformat_open_input(AVFormatContext **ps, const char *filename,
AVInputFormat *fmt, AVDictionary **options)
主要是打开文件，探测格式:
avio_open2(&s->pb, filename);
av_probe_input_buffer2();
s->iformat->read_header(s);
avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)

st = ic->streams[i]; //i \in [0, ic->nb_streams)
在ffm_read_header，pmt_cb里面，mpegts_set_stream_info-->
avpriv_set_pts_info(st, 33, 1, 90000);设置st->time_base，然后赋给
st->codec->time_base = st->time_base;

read_frame_internal(ic, &pkt1)-->ff_read_packet
对于一些没有文件头的格式，比如mpeg, 会动态的avformat_new_stream，
比如在pmt_cb和mpegts_push_data时。

codec = avcodec_find_decoder(st->codec->codec_id);
通常这些没有：
frame_size, sample_fmt, samples_rate, channels
width, pix_fmt,
打开avcodec_open2(st->codec, codec, options)来解码这些参数，
比如mpegaudiodec_template.c:decode_frame-->avpriv_mpegaudio_decode_header
mpeg12dec.c:decode_chunks-->
mpeg1_decode_sequence
mpeg_decode_postinit -->ff_set_dimensions
mpeg音频只需要读文件头就可以了，mpeg视频稍微复杂些，要去找SEQ_START_CODE，
h264就更复杂了，不是两句话就说的清楚的，请参考[2]

int av_read_frame(AVFormatContext *s, AVPacket *pkt)
返回一个流里面的下一帧，并猜测包的pts/dts/duration。
通常情形下，视频pkt包含完整一帧数据，音频pkt包含连续几帧数据。
异常情况下，pkt可能包含部分帧，帧的后面部分，帧间的无效数据。

比如mpegts.c:mpegts_push_data, pes头里面没有指定大小。

附我为ffmpeg写的第一个扩展：


#include "libavutil/opt.h"
#include "libavutil/avstring.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/crc.h"
#include "libavutil/dict.h"
#include "libavutil/mathematics.h"
#include "avformat.h"
#include "internal.h"
#include "avio_internal.h"

#define MAX_PES_HEADER_SIZE (9 + 255)
#define MAX_PES_PAYLOAD_SIZE (200*1024)
typedef struct{
	AVClass *class; /*for priv_class.option, must be first*/
	enum AVCodecID codec_id; /*AV_CODEC_ID_XX*/
	int64_t last_dts; /*dts of last pes*/
	int64_t last_pts; /*pts of last pes*/
	int extended_stream_id;
}PesContext;

static int is_pes_start_code(int32_t code)
{
	int found = 0;
	switch(code){
		case 0x000001e0: /*video*/
		case 0x000001c0: /*audio*/
		case 0x000001bc: /*program_stream_map*/
		case 0x000001bf: /*private_stream_2*/
		case 0x000001f0: /*ECM*/
		case 0x000001f1: /*EMM*/
		case 0x000001f2: /*DSMCC_stream*/
		case 0x000001f8: /*H.22.1 type E stream*/
		case 0x000001ff: /*program_stream_directory*/
			found = 1;
		default: break;
	}
	return found;
}

static inline int64_t parse_pes_pts(const uint8_t *buf) {
    return (int64_t)(*buf & 0x0e) << 29 |
            (AV_RB16(buf+1) >> 1) << 15 |
             AV_RB16(buf+3) >> 1;
}

static int pes_read_probe(AVProbeData *p)
{
    const uint8_t *ptr, *end;
	uint32_t code;
	int found  = 0;

    end = p->buf + p->buf_size - sizeof(uint32_t);
    for(ptr = p->buf; ptr < end; ++ptr) {
		code = AV_RB32(ptr);	
		if(found=is_pes_start_code(code))break;

    }
	return found ? AVPROBE_SCORE_EXTENSION + 1 : 0;
}

static int pes_read_header(AVFormatContext *s)
{
    PesContext *pes = s->priv_data;
    AVStream *st;
    int64_t off;
    int ret;

#if 1
    st = avformat_new_stream(s, NULL);
    if (!st)
        return AVERROR(ENOMEM);
    st->codec->codec_type = pes->codec_id >= AV_CODEC_ID_MP2 ? AVMEDIA_TYPE_AUDIO : AVMEDIA_TYPE_VIDEO;
	st->codec->codec_id = pes->codec_id;
#if 0
    st->id = -3; /* -3 to avoid clash with video stream and audio stream */
	st->codec->width = 720;
	st->codec->height = 576;
	st->codec->pix_fmt = AV_PIX_FMT_YUV420P;
    //st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
    st->start_time = 0;

    s->pb->maxsize = -1;
    off = avio_tell(s->pb);
#endif
#endif
	//s->ctx_flags |= AVFMTCTX_NOHEADER;
	pes->last_pts = pes->last_dts = AV_NOPTS_VALUE; 
    /* the parameters will be extracted from the compressed bitstream */
    return 0;
}

static int read_to_next_sync(AVIOContext *pb, uint8_t *buf, int *size)
{/*byte-to-byte manner*/
	uint8_t *ptr = buf, *ptr_end = buf + *size;
	int found = 0;

	for(ptr = buf; !url_feof(pb) && (ptr < ptr_end); ){
		*ptr = avio_r8(pb);	
		if(*ptr++ != 0x00)continue;
		*ptr = avio_r8(pb);	
		if(*ptr++ != 0x00)continue;
		*ptr = avio_r8(pb);	
		if(*ptr++ != 0x01)continue;
		*ptr = avio_r8(pb);	
		if(is_pes_start_code((0x000001<<8)|(*ptr++) )){
			found = 1;
			break;
		}
	}
	if(found){
		avio_seek(pb, -4, SEEK_CUR);
		*size = ptr - buf - 4;
	}else{
		*size = ptr - buf;
	}

	//printf("==xx %02x %02x %02x %02x %02x\n", buf[0], buf[1], buf[2], buf[3], buf[4]);
	#if 1


	#endif

	return 0;
}

static int pes_read_packet(AVFormatContext *s, AVPacket *pkt)
{
	PesContext *pes = s->priv_data;
	AVIOContext *pb = s->pb;
	int64_t pos, pos2, size = MAX_PES_PAYLOAD_SIZE;
	int64_t pts = pes->last_pts, dts = pes->last_pts;
	int32_t code;
	uint8_t tmp;
	int ret = 0, found = 0, header_size, pes_header_size;
	int total_size = 0;
	unsigned int flags, pes_ext, skip;
	uint8_t *r, buf[MAX_PES_HEADER_SIZE ] = "";

	pos = avio_tell(pb);
	header_size = avio_read(pb, buf, sizeof(buf));
	if(header_size < 9 ){
		return AVERROR_EOF;
	}
	
	if(!is_pes_start_code(AV_RB32(buf)))goto _end;

	total_size = AV_RB16(buf+4); //4,5;
	flags = AV_RB8(buf+7); //7
	pes_header_size = AV_RB8(buf+8) + 9; //8
	r = buf + 9;
	if ((flags & 0xc0) == 0x80) {
		dts = pts = parse_pes_pts(r);
		r += 5;
	} else if ((flags & 0xc0) == 0xc0) {
		pts = parse_pes_pts(r);
		r += 5;
		dts = parse_pes_pts(r);
		r += 5;
	}
	pes->extended_stream_id = -1;
	if (flags & 0x01) { /* PES extension */
		pes_ext = *r++;
		/* Skip PES private data, program packet sequence counter and P-STD buffer */
		skip  = (pes_ext >> 4) & 0xb;
		skip += skip & 0x9;
		r += skip;  
		if ((pes_ext & 0x41) == 0x01 &&
				(r + 2) <= (buf + pes_header_size)) {
			/* PES extension 2 */
			if ((r[0] & 0x7f) > 0 && (r[1] & 0x80) == 0)
				pes->extended_stream_id = r[1];
		}
	}
	
	pos += pes_header_size ; 
_end:
	if(total_size && size > total_size)
		size = total_size + 6 - pes_header_size;
	if(av_new_packet(pkt, size) < 0){
		return AVERROR(ENOMEM);
	}

	avio_seek(pb, pos, SEEK_SET);
	if(total_size){
		avio_read(pb, pkt->data, pkt->size);
	}else{
		read_to_next_sync(pb, pkt->data, &pkt->size);
	}

	#if 0
	static unsigned int total_bytes = 0;
	total_bytes += pkt->size;
	printf("==size %d total %u\t", pkt->size, total_bytes);
	static FILE *fp;
	if(!fp){
		fp = fopen("xx.mpg", "wb");
	}

	fwrite(pkt->data, 1, pkt->size, fp);
	fflush(fp);
	#endif

	pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
	pkt->stream_index = 0;
	pkt->pts = pts;
	pkt->dts = dts;
	pes->last_pts = pts;
	pes->last_dts = dts;

	return ret;
}


static const AVOption options[] = {
    { "codec_id", "AV_CODEC_ID_XX", offsetof(PesContext, codec_id), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
    { NULL },
};

static const AVClass demuxer_class = {
    .class_name = "pes class",
    .item_name  = av_default_item_name,
    .option     = options,
    .version    = LIBAVUTIL_VERSION_INT,
    .category   = AV_CLASS_CATEGORY_DEMUXER,
};

AVInputFormat ff_pes_demuxer = {
    .name           = "pes",
    .long_name      = NULL_IF_CONFIG_SMALL("Packeted Elementary Stream"),
    .read_probe     = pes_read_probe,
    .read_header    = pes_read_header,
    .read_packet    = pes_read_packet,
    .priv_data_size = sizeof(PesContext),
    .flags          = AVFMT_GENERIC_INDEX,
    .extensions     = "pes", 
    .priv_class     = &demuxer_class,
};

测试

录制的流可以播放，音视频正常同步，不过场景变化快时有些许马赛克。

TODO

[mpeg @ 0x1a5ca10] buffer underflow st=1 bufi=3440 size=6912
[mpeg @ 0x1a5ca10] packet too large, ignoring buffer limits to mux it
[mpeg @ 0x1a5ca10] buffer underflow st=1 bufi=3440 size=6912

参考

[2] http://stackoverflow.com/questions/6394874/fetching-the-dimensions-of-a-h264video-stream